diff --git a/Kraken_Compiled_Grammer_file_format.txt b/Kraken_Compiled_Grammer_file_format.txt new file mode 100644 index 0000000..32a40ac --- /dev/null +++ b/Kraken_Compiled_Grammer_file_format.txt @@ -0,0 +1,41 @@ +Kraken Compiled Grammer file format (.kgm.comp) + +This file is generated on first run, and regenerated everytime the grammer changes. +It contains the RNGLR table generated from the specified grammer so that it does not +have to be remade every time Kraken is run, saving a lot of time. +(at time of writing, non-cached: ~30 seconds, cached: <1 second) + + +This is a binary format. The first bytes are a magic number (KRAK in asci) + +The next bytes are an unsigned integer indicating how many characters follow. +Next are these characters, which are the grammer file as one long string. + +Next is the parse table length, followed by the table itself, exported with the table's export method. +It can be imported with the import method. +Note that within the parse table's data are parse actions, and within that, Symbols. + +The format: (more or less) +____________________ +|KRAK +|length_of_grammer_text +|GRAMMER_TEXT +|PARSE_TABLE +|-|length_of_symbol_index_vector +|-|SYMBOL_INDEX_VECTOR +|-|length_of_out_table_vector +|-|OUT_TABLE_VECTOR +|-|-|length_of_mid_table_vector +|-|-|MID_TABLE_VECTOR +|-|-|-|length_of_in_table_vector +|-|-|-|IN_TABLE_VECTOR +|-|-|-|-|length_of_parse_action +|-|-|-|-|PARSE_ACTION +|-|-|-|-|-|ActionType +|-|-|-|-|-|ParseRule__if_exists +|-|-|-|-|-|-|pointerIndex +|-|-|-|-|-|-|Symbol_left_handel +|-|-|-|-|-|-|rightside_vector_symbol +|-|-|-|-|-|shiftState +____________________ + diff --git a/include/Parser.h b/include/Parser.h index 0e876d1..6f1cc49 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -32,6 +32,8 @@ class Parser { virtual std::string grammerToDOT(); std::string tableToString(); + void exportTable(std::ofstream &file); + void importTable(char* tableData); protected: std::vector* firstSet(Symbol token); diff --git a/include/Table.h b/include/Table.h index fbfd3b9..6b8ce88 100644 --- a/include/Table.h +++ b/include/Table.h @@ -1,3 +1,5 @@ +#include + #include "util.h" #include "ParseRule.h" #include "ParseAction.h" @@ -11,6 +13,8 @@ class Table { public: Table(); ~Table(); + void exportTable(std::ofstream &file); + void importTable(char* tableData); void setSymbols(Symbol EOFSymbol, Symbol nullSymbol); void add(int stateNum, Symbol tranSymbol, ParseAction* action); void remove(int stateNum, Symbol tranSymbol); diff --git a/main.cpp b/main.cpp index ae991e1..a6ee9d4 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include "NodeTree.h" #include "Symbol.h" #include "Lexer.h" @@ -28,7 +30,7 @@ int main(int argc, char* argv[]) { } std::ifstream programInFile, grammerInFile, compiledGrammerInFile; - std::ofstream outFile, outFileTransformed, outFileAST, outFileC; + std::ofstream outFile, outFileTransformed, outFileAST, outFileC, compiledGrammerOutFile; programInFile.open(argv[1]); if (!programInFile.is_open()) { @@ -38,18 +40,18 @@ int main(int argc, char* argv[]) { std::string grammerFileString = argv[2]; - // compiledGrammerInFile.open(grammerFileString + ".comp"); - // if (!compiledGrammerInFile.is_open()) { - // std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n"; - // return(1); - // } - grammerInFile.open(grammerFileString); if (!grammerInFile.is_open()) { std::cout << "Problem opening grammerInFile " << grammerFileString << "\n"; return(1); } + compiledGrammerInFile.open(grammerFileString + ".comp", std::ios::binary | std::ios::ate); + if (!compiledGrammerInFile.is_open()) { + std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n"; + //return(1); + } + outFile.open(argv[3]); if (!outFile.is_open()) { std::cout << "Probelm opening output file " << argv[3] << "\n"; @@ -93,7 +95,57 @@ int main(int argc, char* argv[]) { parser.loadGrammer(grammerInputFileString); //std::cout << "Creating State Set from Main" << std::endl; std::cout << "\nState Set" << std::endl; - parser.createStateSet(); + + //Start binary stuff + bool compGramGood = false; + if (compiledGrammerInFile.is_open()) { + std::cout << "Compiled grammer file exists, reading it in" << std::endl; + std::streampos compGramSize = compiledGrammerInFile.tellg(); + char* binaryTablePointer = new char [compGramSize]; + compiledGrammerInFile.seekg(0, std::ios::beg); + compiledGrammerInFile.read(binaryTablePointer, compGramSize); + compiledGrammerInFile.close(); + if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') { + std::cout << "Valid Kraken Compiled Grammer File" << std::endl; + int gramStringLength = *((int*)(binaryTablePointer+4)); + std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is " + << grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl; + if (grammerInputFileString.length() != gramStringLength-1 || + (strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) { + //(one less for null terminator that is stored) + std::cout << "The Grammer has been changed, will re-create" << std::endl; + } else { + compGramGood = true; + std::cout << "grammer file good" << std::endl; + //int tableLength = *((int*)(binaryTablePointer + 4 + sizeof(int) + gramStringLength)); + parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section + } + } else { + std::cout << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl; + return -1; + } + delete binaryTablePointer; + } + + if (!compGramGood) { + //The load failed because either the file does not exist or it is not up-to-date. + std::cout << "Compiled grammer file does not exist or is not up-to-date, generating table and writing it out" << std::endl; + compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary); + if (!compiledGrammerOutFile.is_open()) + std::cout << "Could not open compiled file to write either!" << std::endl; + compiledGrammerOutFile.write("KRAK", sizeof(char)*4); + int* intBuffer = new int; + *intBuffer = grammerInputFileString.length()+1; + compiledGrammerOutFile.write((char*)intBuffer, sizeof(int)); + delete intBuffer; + compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator + + parser.createStateSet(); + parser.exportTable(compiledGrammerOutFile); + compiledGrammerOutFile.close(); + } + //End binary stuff + //std::cout << "finished State Set from Main" << std::endl; //std::cout << "Doing stateSetToString from Main" << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; diff --git a/src/Parser.cpp b/src/Parser.cpp index f8236c5..5133c32 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid Parser::~Parser() { } +void Parser::exportTable(std::ofstream &file) { + //Do table + table.exportTable(file); +} +void Parser::importTable(char* tableData) { + //Do table + table.importTable(tableData); + return; +} + Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) { Symbol symbol; std::pair entry = std::make_pair(symbolString, isTerminal); diff --git a/src/Table.cpp b/src/Table.cpp index a94d6b2..0c8b8fe 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -8,6 +8,203 @@ Table::~Table() { // } +void Table::exportTable(std::ofstream &file) { + //Save symbolIndexVec + int size = symbolIndexVec.size(); + file.write((char*)&size, sizeof(int)); + for (int i = 0; i < symbolIndexVec.size(); i++) { + //Save the name + std::string symbolName = symbolIndexVec[i].getName(); //Get the string + size = symbolName.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolName.c_str()), size); //Save the string + + //Save the value + std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string + size = symbolValue.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolValue.c_str()), size); //Save the string + + bool isTerminal = symbolIndexVec[i].isTerminal(); + file.write((char*)&isTerminal, sizeof(bool)); //Save the true false + } + + //Save the actual table + size = table.size(); + file.write((char*)&size, sizeof(int)); + for (int i = 0; i < table.size(); i++) { + //each item is a middle vector + //std::vector< std::vector< std::vector* >* > table; + std::vector< std::vector* >* middleVector = table[i]; + int middleVectorSize = middleVector->size(); + file.write((char*)&middleVectorSize, sizeof(int)); + + for (int j = 0; j < middleVectorSize; j++) { + //each item is an inner vector + std::vector* innerVector = (*middleVector)[j]; + int innerVectorSize = 0; + if (innerVector) + innerVectorSize = innerVector->size(); + else + innerVectorSize = 0; + file.write((char*)&innerVectorSize, sizeof(int)); + + for (int k = 0; k < innerVectorSize; k++) { + //Save the type + ParseAction* toSave = (*innerVector)[k]; + ParseAction::ActionType actionType = toSave->action; + file.write((char*)&actionType, sizeof(ParseAction::ActionType)); + //Save the reduce rule if necessary + if (actionType == ParseAction::REDUCE) { + //Save the reduce rule + ParseRule* rule = toSave->reduceRule; + //int pointer index + int ptrIndx = rule->getIndex(); + file.write((char*)&ptrIndx, sizeof(int)); + + //Symbol leftHandle + Symbol leftHandle = rule->getLeftSide(); + //Save the name + std::string symbolName = leftHandle.getName(); //Get the string + size = symbolName.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolName.c_str()), size); //Save the string + + //Save the value + std::string symbolValue = leftHandle.getValue(); //Get the string + size = symbolValue.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolValue.c_str()), size); //Save the string + + bool isTerminal = leftHandle.isTerminal(); + file.write((char*)&isTerminal, sizeof(bool)); //Save the true false + + //std::vector* lookahead; + //Should not need + + //std::vector rightSide; + std::vector rightSide = rule->getRightSide(); + size = rightSide.size(); + std::cout << leftHandle.toString() << std::endl; + file.write((char*)&size, sizeof(int)); + for (int l = 0; l < rightSide.size(); l++) { + //Save the name + symbolName = rightSide[l].getName(); //Get the string + size = symbolName.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolName.c_str()), size); //Save the string + // + //Save the value + symbolValue = rightSide[l].getValue(); //Get the string + size = symbolValue.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolValue.c_str()), size); //Save the string + // + isTerminal = rightSide[l].isTerminal(); + file.write((char*)&isTerminal, sizeof(bool)); //Save the true false + } + } + int shiftState = toSave->shiftState; + file.write((char*)&shiftState, sizeof(int)); + } + } + + } +} + +void Table::importTable(char* tableData) { + //Load symbolIndexVec + + int size = *((int*)tableData); + tableData += sizeof(int); + for (int i = 0; i < size; i++) { + int stringLen = *((int*)tableData); + tableData += sizeof(int); + std::string symbolName = std::string(tableData); + tableData += stringLen*sizeof(char); + stringLen = *((int*)tableData); + tableData += sizeof(int); + std::string symbolValue = std::string(tableData); + tableData += stringLen*sizeof(char); + + bool isTerminal = *((bool*)tableData); + tableData += sizeof(bool); + + symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue)); + } + + //Now for the actual table + int tableSize = *((int*)tableData); + tableData += sizeof(int); + for (int i = 0; i < tableSize; i++) { + //each item is a middle vector + std::vector< std::vector* >* middleVector = new std::vector< std::vector* >(); + table.push_back(middleVector); + + int middleVectorSize = *((int*)tableData); + tableData += sizeof(int); + for (int j = 0; j < middleVectorSize; j++) { + //each item is an inner vector + std::vector* innerVector = new std::vector(); + middleVector->push_back(innerVector); + int innerVectorSize = *((int*)tableData); + tableData += sizeof(int); + for (int k = 0; k < innerVectorSize; k++) { + //each item is a ParseRule + ParseAction::ActionType action = *((ParseAction::ActionType*)tableData); + tableData += sizeof(ParseAction::ActionType); + //If reduce, import the reduce rule + ParseRule* reduceRule = NULL; + if (action == ParseAction::REDUCE) { + int ptrIndx = *((int*)tableData); + tableData += sizeof(int); + + size = *((int*)tableData); + tableData += sizeof(int); + std::string leftHandleName = std::string(tableData); + tableData += size*sizeof(char); + size = *((int*)tableData); + tableData += sizeof(int); + std::string leftHandleValue = std::string(tableData); + tableData += size*sizeof(char); + + bool isTerminal = *((bool*)tableData); + tableData += sizeof(bool); + + //right side + std::vector rightSide; + size = *((int*)tableData); + tableData += sizeof(int); + for (int l = 0; l < size; l++) { + int inStringLen = *((int*)tableData); + tableData += sizeof(int); + std::string inSymbolName = std::string(tableData); + tableData += inStringLen*sizeof(char); + + inStringLen = *((int*)tableData); + tableData += sizeof(int); + std::string inSymbolValue = std::string(tableData); + tableData += inStringLen*sizeof(char); + + bool inIsTerminal = *((bool*)tableData); + tableData += sizeof(bool); + rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue)); + } + reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL); + } + int shiftState = *((int*)tableData); + tableData += sizeof(int); + + //And push the new action back + if (reduceRule) + innerVector->push_back(new ParseAction(action, reduceRule)); + else + innerVector->push_back(new ParseAction(action, shiftState)); + } + } + } +} + void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) { this->EOFSymbol = EOFSymbol; this->nullSymbol = nullSymbol;