Added caching of the RNGLR table. It is automatically regenerated whenever the grammer changes. Right now it has dropped compiling the test file from 30 seconds to less than one second.
This commit is contained in:
41
Kraken_Compiled_Grammer_file_format.txt
Normal file
41
Kraken_Compiled_Grammer_file_format.txt
Normal file
@@ -0,0 +1,41 @@
|
||||
Kraken Compiled Grammer file format (.kgm.comp)
|
||||
|
||||
This file is generated on first run, and regenerated everytime the grammer changes.
|
||||
It contains the RNGLR table generated from the specified grammer so that it does not
|
||||
have to be remade every time Kraken is run, saving a lot of time.
|
||||
(at time of writing, non-cached: ~30 seconds, cached: <1 second)
|
||||
|
||||
|
||||
This is a binary format. The first bytes are a magic number (KRAK in asci)
|
||||
|
||||
The next bytes are an unsigned integer indicating how many characters follow.
|
||||
Next are these characters, which are the grammer file as one long string.
|
||||
|
||||
Next is the parse table length, followed by the table itself, exported with the table's export method.
|
||||
It can be imported with the import method.
|
||||
Note that within the parse table's data are parse actions, and within that, Symbols.
|
||||
|
||||
The format: (more or less)
|
||||
____________________
|
||||
|KRAK
|
||||
|length_of_grammer_text
|
||||
|GRAMMER_TEXT
|
||||
|PARSE_TABLE
|
||||
|-|length_of_symbol_index_vector
|
||||
|-|SYMBOL_INDEX_VECTOR
|
||||
|-|length_of_out_table_vector
|
||||
|-|OUT_TABLE_VECTOR
|
||||
|-|-|length_of_mid_table_vector
|
||||
|-|-|MID_TABLE_VECTOR
|
||||
|-|-|-|length_of_in_table_vector
|
||||
|-|-|-|IN_TABLE_VECTOR
|
||||
|-|-|-|-|length_of_parse_action
|
||||
|-|-|-|-|PARSE_ACTION
|
||||
|-|-|-|-|-|ActionType
|
||||
|-|-|-|-|-|ParseRule__if_exists
|
||||
|-|-|-|-|-|-|pointerIndex
|
||||
|-|-|-|-|-|-|Symbol_left_handel
|
||||
|-|-|-|-|-|-|rightside_vector_symbol
|
||||
|-|-|-|-|-|shiftState
|
||||
____________________
|
||||
|
||||
@@ -32,6 +32,8 @@ class Parser {
|
||||
virtual std::string grammerToDOT();
|
||||
|
||||
std::string tableToString();
|
||||
void exportTable(std::ofstream &file);
|
||||
void importTable(char* tableData);
|
||||
|
||||
protected:
|
||||
std::vector<Symbol>* firstSet(Symbol token);
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#include <fstream>
|
||||
|
||||
#include "util.h"
|
||||
#include "ParseRule.h"
|
||||
#include "ParseAction.h"
|
||||
@@ -11,6 +13,8 @@ class Table {
|
||||
public:
|
||||
Table();
|
||||
~Table();
|
||||
void exportTable(std::ofstream &file);
|
||||
void importTable(char* tableData);
|
||||
void setSymbols(Symbol EOFSymbol, Symbol nullSymbol);
|
||||
void add(int stateNum, Symbol tranSymbol, ParseAction* action);
|
||||
void remove(int stateNum, Symbol tranSymbol);
|
||||
|
||||
68
main.cpp
68
main.cpp
@@ -3,6 +3,8 @@
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "NodeTree.h"
|
||||
#include "Symbol.h"
|
||||
#include "Lexer.h"
|
||||
@@ -28,7 +30,7 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
std::ifstream programInFile, grammerInFile, compiledGrammerInFile;
|
||||
std::ofstream outFile, outFileTransformed, outFileAST, outFileC;
|
||||
std::ofstream outFile, outFileTransformed, outFileAST, outFileC, compiledGrammerOutFile;
|
||||
|
||||
programInFile.open(argv[1]);
|
||||
if (!programInFile.is_open()) {
|
||||
@@ -38,18 +40,18 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
std::string grammerFileString = argv[2];
|
||||
|
||||
// compiledGrammerInFile.open(grammerFileString + ".comp");
|
||||
// if (!compiledGrammerInFile.is_open()) {
|
||||
// std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n";
|
||||
// return(1);
|
||||
// }
|
||||
|
||||
grammerInFile.open(grammerFileString);
|
||||
if (!grammerInFile.is_open()) {
|
||||
std::cout << "Problem opening grammerInFile " << grammerFileString << "\n";
|
||||
return(1);
|
||||
}
|
||||
|
||||
compiledGrammerInFile.open(grammerFileString + ".comp", std::ios::binary | std::ios::ate);
|
||||
if (!compiledGrammerInFile.is_open()) {
|
||||
std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n";
|
||||
//return(1);
|
||||
}
|
||||
|
||||
outFile.open(argv[3]);
|
||||
if (!outFile.is_open()) {
|
||||
std::cout << "Probelm opening output file " << argv[3] << "\n";
|
||||
@@ -93,7 +95,57 @@ int main(int argc, char* argv[]) {
|
||||
parser.loadGrammer(grammerInputFileString);
|
||||
//std::cout << "Creating State Set from Main" << std::endl;
|
||||
std::cout << "\nState Set" << std::endl;
|
||||
parser.createStateSet();
|
||||
|
||||
//Start binary stuff
|
||||
bool compGramGood = false;
|
||||
if (compiledGrammerInFile.is_open()) {
|
||||
std::cout << "Compiled grammer file exists, reading it in" << std::endl;
|
||||
std::streampos compGramSize = compiledGrammerInFile.tellg();
|
||||
char* binaryTablePointer = new char [compGramSize];
|
||||
compiledGrammerInFile.seekg(0, std::ios::beg);
|
||||
compiledGrammerInFile.read(binaryTablePointer, compGramSize);
|
||||
compiledGrammerInFile.close();
|
||||
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
|
||||
std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
|
||||
int gramStringLength = *((int*)(binaryTablePointer+4));
|
||||
std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
|
||||
<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
|
||||
if (grammerInputFileString.length() != gramStringLength-1 ||
|
||||
(strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) {
|
||||
//(one less for null terminator that is stored)
|
||||
std::cout << "The Grammer has been changed, will re-create" << std::endl;
|
||||
} else {
|
||||
compGramGood = true;
|
||||
std::cout << "grammer file good" << std::endl;
|
||||
//int tableLength = *((int*)(binaryTablePointer + 4 + sizeof(int) + gramStringLength));
|
||||
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
|
||||
}
|
||||
} else {
|
||||
std::cout << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
delete binaryTablePointer;
|
||||
}
|
||||
|
||||
if (!compGramGood) {
|
||||
//The load failed because either the file does not exist or it is not up-to-date.
|
||||
std::cout << "Compiled grammer file does not exist or is not up-to-date, generating table and writing it out" << std::endl;
|
||||
compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary);
|
||||
if (!compiledGrammerOutFile.is_open())
|
||||
std::cout << "Could not open compiled file to write either!" << std::endl;
|
||||
compiledGrammerOutFile.write("KRAK", sizeof(char)*4);
|
||||
int* intBuffer = new int;
|
||||
*intBuffer = grammerInputFileString.length()+1;
|
||||
compiledGrammerOutFile.write((char*)intBuffer, sizeof(int));
|
||||
delete intBuffer;
|
||||
compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator
|
||||
|
||||
parser.createStateSet();
|
||||
parser.exportTable(compiledGrammerOutFile);
|
||||
compiledGrammerOutFile.close();
|
||||
}
|
||||
//End binary stuff
|
||||
|
||||
//std::cout << "finished State Set from Main" << std::endl;
|
||||
//std::cout << "Doing stateSetToString from Main" << std::endl;
|
||||
// std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;
|
||||
|
||||
@@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid
|
||||
Parser::~Parser() {
|
||||
}
|
||||
|
||||
void Parser::exportTable(std::ofstream &file) {
|
||||
//Do table
|
||||
table.exportTable(file);
|
||||
}
|
||||
void Parser::importTable(char* tableData) {
|
||||
//Do table
|
||||
table.importTable(tableData);
|
||||
return;
|
||||
}
|
||||
|
||||
Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
|
||||
Symbol symbol;
|
||||
std::pair<std::string, bool> entry = std::make_pair(symbolString, isTerminal);
|
||||
|
||||
197
src/Table.cpp
197
src/Table.cpp
@@ -8,6 +8,203 @@ Table::~Table() {
|
||||
//
|
||||
}
|
||||
|
||||
void Table::exportTable(std::ofstream &file) {
|
||||
//Save symbolIndexVec
|
||||
int size = symbolIndexVec.size();
|
||||
file.write((char*)&size, sizeof(int));
|
||||
for (int i = 0; i < symbolIndexVec.size(); i++) {
|
||||
//Save the name
|
||||
std::string symbolName = symbolIndexVec[i].getName(); //Get the string
|
||||
size = symbolName.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolName.c_str()), size); //Save the string
|
||||
|
||||
//Save the value
|
||||
std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string
|
||||
size = symbolValue.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolValue.c_str()), size); //Save the string
|
||||
|
||||
bool isTerminal = symbolIndexVec[i].isTerminal();
|
||||
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
|
||||
}
|
||||
|
||||
//Save the actual table
|
||||
size = table.size();
|
||||
file.write((char*)&size, sizeof(int));
|
||||
for (int i = 0; i < table.size(); i++) {
|
||||
//each item is a middle vector
|
||||
//std::vector< std::vector< std::vector<ParseAction*>* >* > table;
|
||||
std::vector< std::vector<ParseAction*>* >* middleVector = table[i];
|
||||
int middleVectorSize = middleVector->size();
|
||||
file.write((char*)&middleVectorSize, sizeof(int));
|
||||
|
||||
for (int j = 0; j < middleVectorSize; j++) {
|
||||
//each item is an inner vector
|
||||
std::vector<ParseAction*>* innerVector = (*middleVector)[j];
|
||||
int innerVectorSize = 0;
|
||||
if (innerVector)
|
||||
innerVectorSize = innerVector->size();
|
||||
else
|
||||
innerVectorSize = 0;
|
||||
file.write((char*)&innerVectorSize, sizeof(int));
|
||||
|
||||
for (int k = 0; k < innerVectorSize; k++) {
|
||||
//Save the type
|
||||
ParseAction* toSave = (*innerVector)[k];
|
||||
ParseAction::ActionType actionType = toSave->action;
|
||||
file.write((char*)&actionType, sizeof(ParseAction::ActionType));
|
||||
//Save the reduce rule if necessary
|
||||
if (actionType == ParseAction::REDUCE) {
|
||||
//Save the reduce rule
|
||||
ParseRule* rule = toSave->reduceRule;
|
||||
//int pointer index
|
||||
int ptrIndx = rule->getIndex();
|
||||
file.write((char*)&ptrIndx, sizeof(int));
|
||||
|
||||
//Symbol leftHandle
|
||||
Symbol leftHandle = rule->getLeftSide();
|
||||
//Save the name
|
||||
std::string symbolName = leftHandle.getName(); //Get the string
|
||||
size = symbolName.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolName.c_str()), size); //Save the string
|
||||
|
||||
//Save the value
|
||||
std::string symbolValue = leftHandle.getValue(); //Get the string
|
||||
size = symbolValue.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolValue.c_str()), size); //Save the string
|
||||
|
||||
bool isTerminal = leftHandle.isTerminal();
|
||||
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
|
||||
|
||||
//std::vector<Symbol>* lookahead;
|
||||
//Should not need
|
||||
|
||||
//std::vector<Symbol> rightSide;
|
||||
std::vector<Symbol> rightSide = rule->getRightSide();
|
||||
size = rightSide.size();
|
||||
std::cout << leftHandle.toString() << std::endl;
|
||||
file.write((char*)&size, sizeof(int));
|
||||
for (int l = 0; l < rightSide.size(); l++) {
|
||||
//Save the name
|
||||
symbolName = rightSide[l].getName(); //Get the string
|
||||
size = symbolName.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolName.c_str()), size); //Save the string
|
||||
//
|
||||
//Save the value
|
||||
symbolValue = rightSide[l].getValue(); //Get the string
|
||||
size = symbolValue.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolValue.c_str()), size); //Save the string
|
||||
//
|
||||
isTerminal = rightSide[l].isTerminal();
|
||||
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
|
||||
}
|
||||
}
|
||||
int shiftState = toSave->shiftState;
|
||||
file.write((char*)&shiftState, sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void Table::importTable(char* tableData) {
|
||||
//Load symbolIndexVec
|
||||
|
||||
int size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int i = 0; i < size; i++) {
|
||||
int stringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string symbolName = std::string(tableData);
|
||||
tableData += stringLen*sizeof(char);
|
||||
stringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string symbolValue = std::string(tableData);
|
||||
tableData += stringLen*sizeof(char);
|
||||
|
||||
bool isTerminal = *((bool*)tableData);
|
||||
tableData += sizeof(bool);
|
||||
|
||||
symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue));
|
||||
}
|
||||
|
||||
//Now for the actual table
|
||||
int tableSize = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int i = 0; i < tableSize; i++) {
|
||||
//each item is a middle vector
|
||||
std::vector< std::vector<ParseAction*>* >* middleVector = new std::vector< std::vector<ParseAction*>* >();
|
||||
table.push_back(middleVector);
|
||||
|
||||
int middleVectorSize = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int j = 0; j < middleVectorSize; j++) {
|
||||
//each item is an inner vector
|
||||
std::vector<ParseAction*>* innerVector = new std::vector<ParseAction*>();
|
||||
middleVector->push_back(innerVector);
|
||||
int innerVectorSize = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int k = 0; k < innerVectorSize; k++) {
|
||||
//each item is a ParseRule
|
||||
ParseAction::ActionType action = *((ParseAction::ActionType*)tableData);
|
||||
tableData += sizeof(ParseAction::ActionType);
|
||||
//If reduce, import the reduce rule
|
||||
ParseRule* reduceRule = NULL;
|
||||
if (action == ParseAction::REDUCE) {
|
||||
int ptrIndx = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
|
||||
size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string leftHandleName = std::string(tableData);
|
||||
tableData += size*sizeof(char);
|
||||
size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string leftHandleValue = std::string(tableData);
|
||||
tableData += size*sizeof(char);
|
||||
|
||||
bool isTerminal = *((bool*)tableData);
|
||||
tableData += sizeof(bool);
|
||||
|
||||
//right side
|
||||
std::vector<Symbol> rightSide;
|
||||
size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int l = 0; l < size; l++) {
|
||||
int inStringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string inSymbolName = std::string(tableData);
|
||||
tableData += inStringLen*sizeof(char);
|
||||
|
||||
inStringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string inSymbolValue = std::string(tableData);
|
||||
tableData += inStringLen*sizeof(char);
|
||||
|
||||
bool inIsTerminal = *((bool*)tableData);
|
||||
tableData += sizeof(bool);
|
||||
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
|
||||
}
|
||||
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL);
|
||||
}
|
||||
int shiftState = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
|
||||
//And push the new action back
|
||||
if (reduceRule)
|
||||
innerVector->push_back(new ParseAction(action, reduceRule));
|
||||
else
|
||||
innerVector->push_back(new ParseAction(action, shiftState));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) {
|
||||
this->EOFSymbol = EOFSymbol;
|
||||
this->nullSymbol = nullSymbol;
|
||||
|
||||
Reference in New Issue
Block a user