Added caching of the RNGLR table. It is automatically regenerated whenever the grammer changes. Right now it has dropped compiling the test file from 30 seconds to less than one second.

This commit is contained in:
Nathan Braswell
2013-12-31 02:53:52 -06:00
parent 3455348951
commit 005659b7b7
6 changed files with 314 additions and 8 deletions

View File

@@ -0,0 +1,41 @@
Kraken Compiled Grammer file format (.kgm.comp)
This file is generated on first run, and regenerated everytime the grammer changes.
It contains the RNGLR table generated from the specified grammer so that it does not
have to be remade every time Kraken is run, saving a lot of time.
(at time of writing, non-cached: ~30 seconds, cached: <1 second)
This is a binary format. The first bytes are a magic number (KRAK in asci)
The next bytes are an unsigned integer indicating how many characters follow.
Next are these characters, which are the grammer file as one long string.
Next is the parse table length, followed by the table itself, exported with the table's export method.
It can be imported with the import method.
Note that within the parse table's data are parse actions, and within that, Symbols.
The format: (more or less)
____________________
|KRAK
|length_of_grammer_text
|GRAMMER_TEXT
|PARSE_TABLE
|-|length_of_symbol_index_vector
|-|SYMBOL_INDEX_VECTOR
|-|length_of_out_table_vector
|-|OUT_TABLE_VECTOR
|-|-|length_of_mid_table_vector
|-|-|MID_TABLE_VECTOR
|-|-|-|length_of_in_table_vector
|-|-|-|IN_TABLE_VECTOR
|-|-|-|-|length_of_parse_action
|-|-|-|-|PARSE_ACTION
|-|-|-|-|-|ActionType
|-|-|-|-|-|ParseRule__if_exists
|-|-|-|-|-|-|pointerIndex
|-|-|-|-|-|-|Symbol_left_handel
|-|-|-|-|-|-|rightside_vector_symbol
|-|-|-|-|-|shiftState
____________________

View File

@@ -32,6 +32,8 @@ class Parser {
virtual std::string grammerToDOT();
std::string tableToString();
void exportTable(std::ofstream &file);
void importTable(char* tableData);
protected:
std::vector<Symbol>* firstSet(Symbol token);

View File

@@ -1,3 +1,5 @@
#include <fstream>
#include "util.h"
#include "ParseRule.h"
#include "ParseAction.h"
@@ -11,6 +13,8 @@ class Table {
public:
Table();
~Table();
void exportTable(std::ofstream &file);
void importTable(char* tableData);
void setSymbols(Symbol EOFSymbol, Symbol nullSymbol);
void add(int stateNum, Symbol tranSymbol, ParseAction* action);
void remove(int stateNum, Symbol tranSymbol);

View File

@@ -3,6 +3,8 @@
#include <fstream>
#include <vector>
#include <cstring>
#include "NodeTree.h"
#include "Symbol.h"
#include "Lexer.h"
@@ -28,7 +30,7 @@ int main(int argc, char* argv[]) {
}
std::ifstream programInFile, grammerInFile, compiledGrammerInFile;
std::ofstream outFile, outFileTransformed, outFileAST, outFileC;
std::ofstream outFile, outFileTransformed, outFileAST, outFileC, compiledGrammerOutFile;
programInFile.open(argv[1]);
if (!programInFile.is_open()) {
@@ -38,18 +40,18 @@ int main(int argc, char* argv[]) {
std::string grammerFileString = argv[2];
// compiledGrammerInFile.open(grammerFileString + ".comp");
// if (!compiledGrammerInFile.is_open()) {
// std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n";
// return(1);
// }
grammerInFile.open(grammerFileString);
if (!grammerInFile.is_open()) {
std::cout << "Problem opening grammerInFile " << grammerFileString << "\n";
return(1);
}
compiledGrammerInFile.open(grammerFileString + ".comp", std::ios::binary | std::ios::ate);
if (!compiledGrammerInFile.is_open()) {
std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n";
//return(1);
}
outFile.open(argv[3]);
if (!outFile.is_open()) {
std::cout << "Probelm opening output file " << argv[3] << "\n";
@@ -93,7 +95,57 @@ int main(int argc, char* argv[]) {
parser.loadGrammer(grammerInputFileString);
//std::cout << "Creating State Set from Main" << std::endl;
std::cout << "\nState Set" << std::endl;
parser.createStateSet();
//Start binary stuff
bool compGramGood = false;
if (compiledGrammerInFile.is_open()) {
std::cout << "Compiled grammer file exists, reading it in" << std::endl;
std::streampos compGramSize = compiledGrammerInFile.tellg();
char* binaryTablePointer = new char [compGramSize];
compiledGrammerInFile.seekg(0, std::ios::beg);
compiledGrammerInFile.read(binaryTablePointer, compGramSize);
compiledGrammerInFile.close();
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
int gramStringLength = *((int*)(binaryTablePointer+4));
std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
if (grammerInputFileString.length() != gramStringLength-1 ||
(strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) {
//(one less for null terminator that is stored)
std::cout << "The Grammer has been changed, will re-create" << std::endl;
} else {
compGramGood = true;
std::cout << "grammer file good" << std::endl;
//int tableLength = *((int*)(binaryTablePointer + 4 + sizeof(int) + gramStringLength));
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
}
} else {
std::cout << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl;
return -1;
}
delete binaryTablePointer;
}
if (!compGramGood) {
//The load failed because either the file does not exist or it is not up-to-date.
std::cout << "Compiled grammer file does not exist or is not up-to-date, generating table and writing it out" << std::endl;
compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary);
if (!compiledGrammerOutFile.is_open())
std::cout << "Could not open compiled file to write either!" << std::endl;
compiledGrammerOutFile.write("KRAK", sizeof(char)*4);
int* intBuffer = new int;
*intBuffer = grammerInputFileString.length()+1;
compiledGrammerOutFile.write((char*)intBuffer, sizeof(int));
delete intBuffer;
compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator
parser.createStateSet();
parser.exportTable(compiledGrammerOutFile);
compiledGrammerOutFile.close();
}
//End binary stuff
//std::cout << "finished State Set from Main" << std::endl;
//std::cout << "Doing stateSetToString from Main" << std::endl;
// std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl;

View File

@@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid
Parser::~Parser() {
}
void Parser::exportTable(std::ofstream &file) {
//Do table
table.exportTable(file);
}
void Parser::importTable(char* tableData) {
//Do table
table.importTable(tableData);
return;
}
Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
Symbol symbol;
std::pair<std::string, bool> entry = std::make_pair(symbolString, isTerminal);

View File

@@ -8,6 +8,203 @@ Table::~Table() {
//
}
void Table::exportTable(std::ofstream &file) {
//Save symbolIndexVec
int size = symbolIndexVec.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < symbolIndexVec.size(); i++) {
//Save the name
std::string symbolName = symbolIndexVec[i].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = symbolIndexVec[i].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
//Save the actual table
size = table.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < table.size(); i++) {
//each item is a middle vector
//std::vector< std::vector< std::vector<ParseAction*>* >* > table;
std::vector< std::vector<ParseAction*>* >* middleVector = table[i];
int middleVectorSize = middleVector->size();
file.write((char*)&middleVectorSize, sizeof(int));
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = (*middleVector)[j];
int innerVectorSize = 0;
if (innerVector)
innerVectorSize = innerVector->size();
else
innerVectorSize = 0;
file.write((char*)&innerVectorSize, sizeof(int));
for (int k = 0; k < innerVectorSize; k++) {
//Save the type
ParseAction* toSave = (*innerVector)[k];
ParseAction::ActionType actionType = toSave->action;
file.write((char*)&actionType, sizeof(ParseAction::ActionType));
//Save the reduce rule if necessary
if (actionType == ParseAction::REDUCE) {
//Save the reduce rule
ParseRule* rule = toSave->reduceRule;
//int pointer index
int ptrIndx = rule->getIndex();
file.write((char*)&ptrIndx, sizeof(int));
//Symbol leftHandle
Symbol leftHandle = rule->getLeftSide();
//Save the name
std::string symbolName = leftHandle.getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = leftHandle.getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = leftHandle.isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
//std::vector<Symbol>* lookahead;
//Should not need
//std::vector<Symbol> rightSide;
std::vector<Symbol> rightSide = rule->getRightSide();
size = rightSide.size();
std::cout << leftHandle.toString() << std::endl;
file.write((char*)&size, sizeof(int));
for (int l = 0; l < rightSide.size(); l++) {
//Save the name
symbolName = rightSide[l].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//
//Save the value
symbolValue = rightSide[l].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
//
isTerminal = rightSide[l].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
}
int shiftState = toSave->shiftState;
file.write((char*)&shiftState, sizeof(int));
}
}
}
}
void Table::importTable(char* tableData) {
//Load symbolIndexVec
int size = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < size; i++) {
int stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolName = std::string(tableData);
tableData += stringLen*sizeof(char);
stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolValue = std::string(tableData);
tableData += stringLen*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue));
}
//Now for the actual table
int tableSize = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < tableSize; i++) {
//each item is a middle vector
std::vector< std::vector<ParseAction*>* >* middleVector = new std::vector< std::vector<ParseAction*>* >();
table.push_back(middleVector);
int middleVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = new std::vector<ParseAction*>();
middleVector->push_back(innerVector);
int innerVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int k = 0; k < innerVectorSize; k++) {
//each item is a ParseRule
ParseAction::ActionType action = *((ParseAction::ActionType*)tableData);
tableData += sizeof(ParseAction::ActionType);
//If reduce, import the reduce rule
ParseRule* reduceRule = NULL;
if (action == ParseAction::REDUCE) {
int ptrIndx = *((int*)tableData);
tableData += sizeof(int);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleName = std::string(tableData);
tableData += size*sizeof(char);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleValue = std::string(tableData);
tableData += size*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
//right side
std::vector<Symbol> rightSide;
size = *((int*)tableData);
tableData += sizeof(int);
for (int l = 0; l < size; l++) {
int inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolName = std::string(tableData);
tableData += inStringLen*sizeof(char);
inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolValue = std::string(tableData);
tableData += inStringLen*sizeof(char);
bool inIsTerminal = *((bool*)tableData);
tableData += sizeof(bool);
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
}
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL);
}
int shiftState = *((int*)tableData);
tableData += sizeof(int);
//And push the new action back
if (reduceRule)
innerVector->push_back(new ParseAction(action, reduceRule));
else
innerVector->push_back(new ParseAction(action, shiftState));
}
}
}
}
void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) {
this->EOFSymbol = EOFSymbol;
this->nullSymbol = nullSymbol;