2013-05-20 19:34:15 -04:00
|
|
|
#include "Parser.h"
|
|
|
|
|
|
|
|
|
|
Parser::Parser() {
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Parser::~Parser() {
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-24 00:00:41 -04:00
|
|
|
std::string Parser::intToString(int theInt) {
|
|
|
|
|
std::stringstream converter;
|
|
|
|
|
converter << theInt;
|
|
|
|
|
return converter.str();
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-20 19:34:15 -04:00
|
|
|
Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
|
|
|
|
|
Symbol* symbol;
|
|
|
|
|
if (symbols.find(symbolString) == symbols.end()) {
|
|
|
|
|
symbol = new Symbol(symbolString, isTerminal);
|
|
|
|
|
symbols[symbolString] = symbol;
|
|
|
|
|
} else {
|
|
|
|
|
symbol = symbols[symbolString];
|
|
|
|
|
}
|
|
|
|
|
return(symbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Parser::loadGrammer(std::string grammerInputString) {
|
|
|
|
|
reader.setString(grammerInputString);
|
|
|
|
|
|
|
|
|
|
std::string currToken = reader.word();
|
|
|
|
|
|
|
|
|
|
while(currToken != "") {
|
|
|
|
|
//Load the left of the rule
|
|
|
|
|
ParseRule* currentRule = new ParseRule();
|
|
|
|
|
Symbol* leftSide = getOrAddSymbol(currToken, false); //Left handle is never a terminal
|
|
|
|
|
currentRule->setLeftHandle(leftSide);
|
|
|
|
|
reader.word(); //Remove the =
|
|
|
|
|
//Add the right side, adding new Symbols to symbol map.
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
while (currToken != ";") {
|
|
|
|
|
currentRule->appendToRight(getOrAddSymbol(currToken, currToken.at(0)=='\"')); //If first character is a ", then is a terminal
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
|
|
|
|
|
if (currToken == "|") {
|
|
|
|
|
loadedGrammer.push_back(currentRule);
|
|
|
|
|
currentRule = new ParseRule();
|
|
|
|
|
currentRule->setLeftHandle(leftSide);
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//Add new rule to grammer
|
|
|
|
|
loadedGrammer.push_back(currentRule);
|
|
|
|
|
//Get next token
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
}
|
|
|
|
|
std::cout << "Parsed!\n";
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-24 00:00:41 -04:00
|
|
|
void Parser::createStateSet() {
|
2013-05-24 13:24:33 -04:00
|
|
|
std::cout << "Begining creation of stateSet" << std::endl;
|
2013-05-24 00:00:41 -04:00
|
|
|
stateSets.push_back( new std::vector<ParseRule*> );
|
|
|
|
|
stateSets[0]->push_back(loadedGrammer[0]);
|
2013-05-24 13:24:33 -04:00
|
|
|
std::cout << "Begining for main set for loop" << std::endl;
|
|
|
|
|
for (std::vector< std::vector<ParseRule*>* >::size_type i = 0; i < stateSets.size(); i++) {
|
|
|
|
|
std::cout << "calling closure" << std::endl;
|
|
|
|
|
closure(stateSets[i]);
|
|
|
|
|
std::cout << "finished closure" << std::endl;
|
|
|
|
|
std::cout << "Starting inner for loop that adds states" << std::endl;
|
|
|
|
|
for (std::vector<ParseRule*>::size_type j = 0; j < stateSets[i]->size(); j++) {
|
|
|
|
|
std::cout << "about to call addState" << std::endl;
|
|
|
|
|
addState(&stateSets, stateSets[i], (*stateSets[i])[j]->getRightSide()[(*stateSets[i])[j]->getIndex()]);
|
|
|
|
|
std::cout << "finished addState" << std::endl;
|
2013-05-24 00:00:41 -04:00
|
|
|
//Closure will be called in the outer loop
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Parser::closure(std::vector<ParseRule*>* state) {
|
|
|
|
|
//Add all the applicable rules.
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type i = 0; i < state->size(); i++) {
|
|
|
|
|
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
|
|
|
|
|
if ((*state)[i]->getRightSide()[(*state)[i]->getIndex()] == loadedGrammer[j]->getLeftSide()) {
|
2013-05-24 00:00:41 -04:00
|
|
|
//Check to make sure not already in
|
|
|
|
|
bool isAlreadyInState = false;
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type k = 0; k < state->size(); k++) {
|
|
|
|
|
if ((*state)[k] == loadedGrammer[j]) {
|
2013-05-24 00:00:41 -04:00
|
|
|
isAlreadyInState = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!isAlreadyInState)
|
2013-05-24 13:24:33 -04:00
|
|
|
state->push_back(loadedGrammer[j]);
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Adds state if it doesn't already exist.
|
|
|
|
|
void Parser::addState(std::vector< std::vector<ParseRule*>* >* stateSets, std::vector<ParseRule*>* state, Symbol* symbol) {
|
|
|
|
|
std::vector<std::vector<ParseRule*>* > newStates;
|
|
|
|
|
//For each rule in the state we already have
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type i = 0; i < state->size(); i++) {
|
2013-05-24 00:00:41 -04:00
|
|
|
//Clone the current rule
|
2013-05-24 13:24:33 -04:00
|
|
|
ParseRule* advancedRule = (*state)[i]->clone();
|
2013-05-24 00:00:41 -04:00
|
|
|
//Try to advance the pointer
|
|
|
|
|
if (advancedRule->advancePointer()) {
|
|
|
|
|
//If sucessful, check to see if this the advanced symbol is the basis for any of our new states
|
|
|
|
|
bool symbolAlreadyInState = false;
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<std::vector<ParseRule*>* >::size_type j = 0; j < newStates.size(); j++) {
|
|
|
|
|
if ((*newStates[j])[0]->getRightSide()[(*newStates[j])[0]->getIndex()] == advancedRule->getRightSide()[advancedRule->getIndex()]) {
|
2013-05-24 00:00:41 -04:00
|
|
|
symbolAlreadyInState = true;
|
|
|
|
|
//So now check to see if this exact rule is in this state
|
|
|
|
|
bool ruleAlreadyInState = false;
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type k = 0; k < newStates[j]->size(); k++) {
|
|
|
|
|
if (*(*newStates[j])[k] == (*advancedRule) ) {
|
2013-05-24 00:00:41 -04:00
|
|
|
ruleAlreadyInState = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!ruleAlreadyInState) {
|
2013-05-24 13:24:33 -04:00
|
|
|
newStates[j]->push_back(advancedRule);
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
//We found a state with the same symbol, so stop searching
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!symbolAlreadyInState) {
|
|
|
|
|
std::vector<ParseRule*>* newState = new std::vector<ParseRule*>;
|
|
|
|
|
newState->push_back(advancedRule);
|
|
|
|
|
newStates.push_back(newState);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//Put all our new states in the set of states
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector< std::vector<ParseRule*> * >::size_type i = 0; i < newStates.size(); i++) {
|
|
|
|
|
stateSets->push_back(newStates[i]);
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string Parser::stateSetToString() {
|
|
|
|
|
std::string concat = "";
|
|
|
|
|
int currentNum = 0;
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector< std::vector<ParseRule*> *>::size_type i = 0; i < stateSets.size(); i++) {
|
2013-05-24 00:00:41 -04:00
|
|
|
concat += "State " + intToString(currentNum) + ":\n";
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type j = 0; j < stateSets[i]->size(); j++) {
|
|
|
|
|
concat += "\t" + (*stateSets[i])[j]->toString() + "\n";
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
concat += "\n";
|
2013-05-24 13:24:33 -04:00
|
|
|
currentNum++;
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
return concat;
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-23 01:35:54 -04:00
|
|
|
int Parser::gotoTable(int state, Symbol* token) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ParseAction* Parser::actionTable(int state, Symbol* token) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Parser::parseInput(std::string inputString) {
|
|
|
|
|
StringReader inputReader;
|
|
|
|
|
inputReader.setString(inputString);
|
|
|
|
|
|
|
|
|
|
Symbol* token = new Symbol(reader.word(), false);
|
|
|
|
|
ParseAction* action;
|
|
|
|
|
|
|
|
|
|
stateStack.push(0);
|
|
|
|
|
symbolStack.push(new Symbol("INVALID", false));
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
action = actionTable(stateStack.top(), token);
|
|
|
|
|
switch (action->action) {
|
|
|
|
|
case ParseAction::REDUCE:
|
|
|
|
|
{
|
|
|
|
|
int rightSideLength = action->reduceRule->getRightSide().size();
|
|
|
|
|
for (int i = 0; i < rightSideLength; i++) {
|
|
|
|
|
stateStack.pop();
|
|
|
|
|
symbolStack.pop();
|
|
|
|
|
}
|
|
|
|
|
symbolStack.push(action->reduceRule->getLeftSide());
|
|
|
|
|
stateStack.push(gotoTable(stateStack.top(), symbolStack.top()));
|
|
|
|
|
std::cout << "Reduce by " << action->reduceRule->toString() << std::endl;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case ParseAction::SHIFT:
|
|
|
|
|
symbolStack.push(token);
|
|
|
|
|
token = new Symbol(inputReader.word(), false);
|
|
|
|
|
stateStack.push(action->shiftState);
|
|
|
|
|
std::cout << "Shift " << symbolStack.top()->toString() << std::endl;
|
|
|
|
|
break;
|
|
|
|
|
case ParseAction::ACCEPT:
|
|
|
|
|
std::cout << "ACCEPTED!" << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
break;
|
|
|
|
|
case ParseAction::REJECT:
|
|
|
|
|
std::cout << "REJECTED!" << std::endl;
|
|
|
|
|
return;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-20 19:34:15 -04:00
|
|
|
std::string Parser::grammerToString() {
|
|
|
|
|
//Iterate through the vector, adding string representation of each grammer rule
|
|
|
|
|
std::cout << "About to toString\n";
|
|
|
|
|
std::string concat = "";
|
|
|
|
|
for (int i = 0; i < loadedGrammer.size(); i++) {
|
2013-05-20 22:59:57 -04:00
|
|
|
concat += loadedGrammer[i]->toString() + "\n";
|
2013-05-20 19:34:15 -04:00
|
|
|
}
|
|
|
|
|
return(concat);
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-20 22:59:57 -04:00
|
|
|
std::string Parser::grammerToDOT() {
|
|
|
|
|
//Iterate through the vector, adding DOT representation of each grammer rule
|
|
|
|
|
std::cout << "About to DOT export\n";
|
|
|
|
|
std::string concat = "";
|
|
|
|
|
for (int i = 0; i < loadedGrammer.size(); i++) {
|
|
|
|
|
concat += loadedGrammer[i]->toDOT();
|
|
|
|
|
}
|
|
|
|
|
return("digraph Kraken_Grammer { \n" + concat + "}");
|
|
|
|
|
}
|
|
|
|
|
|