2013-05-20 19:34:15 -04:00
|
|
|
#include "Parser.h"
|
|
|
|
|
|
|
|
|
|
Parser::Parser() {
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Parser::~Parser() {
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
|
|
|
|
|
Symbol* symbol;
|
|
|
|
|
if (symbols.find(symbolString) == symbols.end()) {
|
|
|
|
|
symbol = new Symbol(symbolString, isTerminal);
|
|
|
|
|
symbols[symbolString] = symbol;
|
|
|
|
|
} else {
|
|
|
|
|
symbol = symbols[symbolString];
|
|
|
|
|
}
|
|
|
|
|
return(symbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Parser::loadGrammer(std::string grammerInputString) {
|
|
|
|
|
reader.setString(grammerInputString);
|
|
|
|
|
|
|
|
|
|
std::string currToken = reader.word();
|
|
|
|
|
|
|
|
|
|
while(currToken != "") {
|
|
|
|
|
//Load the left of the rule
|
|
|
|
|
ParseRule* currentRule = new ParseRule();
|
|
|
|
|
Symbol* leftSide = getOrAddSymbol(currToken, false); //Left handle is never a terminal
|
|
|
|
|
currentRule->setLeftHandle(leftSide);
|
|
|
|
|
reader.word(); //Remove the =
|
|
|
|
|
//Add the right side, adding new Symbols to symbol map.
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
while (currToken != ";") {
|
|
|
|
|
currentRule->appendToRight(getOrAddSymbol(currToken, currToken.at(0)=='\"')); //If first character is a ", then is a terminal
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
|
|
|
|
|
if (currToken == "|") {
|
|
|
|
|
loadedGrammer.push_back(currentRule);
|
|
|
|
|
currentRule = new ParseRule();
|
|
|
|
|
currentRule->setLeftHandle(leftSide);
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//Add new rule to grammer
|
|
|
|
|
loadedGrammer.push_back(currentRule);
|
|
|
|
|
//Get next token
|
|
|
|
|
currToken = reader.word();
|
|
|
|
|
}
|
|
|
|
|
std::cout << "Parsed!\n";
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-13 19:11:31 -04:00
|
|
|
std::vector<Symbol*>* Parser::firstSet(Symbol* token) {
|
|
|
|
|
std::vector<Symbol*>* first = new std::vector<Symbol*>();
|
|
|
|
|
//First, if the symbol is a terminal, than it's first set is just itself.
|
|
|
|
|
if (token->isTerminal()) {
|
|
|
|
|
first->push_back(token);
|
|
|
|
|
return(first);
|
|
|
|
|
}
|
|
|
|
|
//Otherwise....
|
|
|
|
|
//Ok, to make a first set, go through the grammer, if the token is part of the left side, add it's production's first token's first set.
|
|
|
|
|
//Theoretically, if that one includes mull, do the next one too. However, null productions have not yet been implemented.
|
|
|
|
|
Symbol* rightToken = NULL;
|
|
|
|
|
std::vector<Symbol*>* recursiveFirstSet = NULL;
|
|
|
|
|
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
|
|
|
|
|
if (*token == *(loadedGrammer[i]->getLeftSide())) {
|
|
|
|
|
rightToken = loadedGrammer[i]->getRightSide()[0]; //Get the first token of the right side of this rule
|
|
|
|
|
if (rightToken->isTerminal())
|
|
|
|
|
first->push_back(rightToken);
|
|
|
|
|
else {
|
|
|
|
|
//Add the entire set
|
|
|
|
|
recursiveFirstSet = firstSet(rightToken);
|
|
|
|
|
first->insert(first->end(), recursiveFirstSet->begin(), recursiveFirstSet->end());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return(first);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Parser::printFirstSets() {
|
|
|
|
|
std::vector<Symbol*>* first = NULL;
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
|
|
|
|
|
first = firstSet(symbolIndexVec[i]);
|
|
|
|
|
std::cout << "First set of " << symbolIndexVec[i]->toString() << " is: ";
|
|
|
|
|
for (std::vector<Symbol*>::size_type j = 0; j < first->size(); j++)
|
|
|
|
|
std::cout << (*first)[j]->toString() << " ";
|
|
|
|
|
std::cout << std::endl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-21 14:16:16 -04:00
|
|
|
|
|
|
|
|
//follow set created from grammer instead of an individual state
|
|
|
|
|
//May not be totally correct, but works for now. Should be simialr to LALR(1)
|
|
|
|
|
//To avoid infinite recursion, we call a function with an avoid list, adding ourselves to it as we go.
|
|
|
|
|
|
|
|
|
|
std::vector<Symbol*>* Parser::gramFollowSet(Symbol* token) {
|
|
|
|
|
std::vector<Symbol*>* avoidList = new std::vector<Symbol*>();
|
|
|
|
|
return gramFollowSetAvoid(token, avoidList);
|
|
|
|
|
}
|
|
|
|
|
std::vector<Symbol*>* Parser::gramFollowSetAvoid(Symbol* token, std::vector<Symbol*>* avoidList) {
|
2013-06-13 23:43:12 -04:00
|
|
|
std::vector<Symbol*>* follow = new std::vector<Symbol*>();
|
|
|
|
|
//First, if the symbol is a terminal, than it's follow set is the empty set.
|
|
|
|
|
if (token->isTerminal()) {
|
|
|
|
|
return(follow);
|
|
|
|
|
}
|
2013-06-21 14:16:16 -04:00
|
|
|
//If the token is in the avoid list, just return
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < avoidList->size(); i++) {
|
|
|
|
|
if (*token == *((*avoidList)[i]))
|
|
|
|
|
return(follow);
|
|
|
|
|
}
|
|
|
|
|
//If not, we're about to process it, so add it to the avoid list
|
|
|
|
|
avoidList->push_back(token);
|
|
|
|
|
|
2013-06-13 23:43:12 -04:00
|
|
|
//Otherwise....
|
2013-06-21 14:16:16 -04:00
|
|
|
//Ok, to make a follow set, go through the state (indicated by stateNum) looking for the terminal in the right side. If it exists
|
2013-06-13 23:43:12 -04:00
|
|
|
//Then add to it's follow set the first set of the next token, or if it is at the end, the follow set of the left side.
|
|
|
|
|
//Theoretically, if that one includes mull, do the next one too. However, null productions have not yet been implemented.
|
|
|
|
|
Symbol* rightToken = NULL;
|
|
|
|
|
std::vector<Symbol*>* recursiveFollowSet = NULL;
|
|
|
|
|
std::vector<Symbol*> rightSide;
|
|
|
|
|
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
|
|
|
|
|
rightSide = loadedGrammer[i]->getRightSide();
|
|
|
|
|
for (std::vector<Symbol*>::size_type j = 0; j < rightSide.size(); j++) {
|
|
|
|
|
if (*token == *(rightSide[j])) {
|
|
|
|
|
//If this is the first grammer rule, that is the goal rule, add $EOF$ and move on
|
|
|
|
|
if (i == 0) {
|
|
|
|
|
follow->push_back(new Symbol("$EOF$", false));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (j < rightSide.size()-1) {
|
|
|
|
|
if (rightSide[j+1]->isTerminal())
|
|
|
|
|
follow->push_back(rightSide[j+1]);
|
|
|
|
|
else {
|
|
|
|
|
recursiveFollowSet = firstSet(rightSide[j+1]);
|
|
|
|
|
follow->insert(follow->begin(), recursiveFollowSet->begin(), recursiveFollowSet->end());
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2013-06-21 14:16:16 -04:00
|
|
|
recursiveFollowSet = gramFollowSetAvoid(loadedGrammer[i]->getLeftSide(), avoidList);
|
2013-06-13 23:43:12 -04:00
|
|
|
follow->insert(follow->begin(), recursiveFollowSet->begin(), recursiveFollowSet->end());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return(follow);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Parser::printFollowSets() {
|
|
|
|
|
std::vector<Symbol*>* follow = NULL;
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
|
2013-06-21 14:16:16 -04:00
|
|
|
follow = gramFollowSet(symbolIndexVec[i]);
|
2013-06-13 23:43:12 -04:00
|
|
|
std::cout << "Follow set of " << symbolIndexVec[i]->toString() << " is: ";
|
|
|
|
|
for (std::vector<Symbol*>::size_type j = 0; j < follow->size(); j++)
|
|
|
|
|
std::cout << (*follow)[j]->toString() << " ";
|
|
|
|
|
std::cout << std::endl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-24 00:00:41 -04:00
|
|
|
void Parser::createStateSet() {
|
2013-05-24 13:24:33 -04:00
|
|
|
std::cout << "Begining creation of stateSet" << std::endl;
|
2013-05-26 22:12:47 -04:00
|
|
|
stateSets.push_back( new State(0, loadedGrammer[0]) );
|
2013-05-30 19:49:19 -04:00
|
|
|
//std::cout << "Begining for main set for loop" << std::endl;
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector< State* >::size_type i = 0; i < stateSets.size(); i++) {
|
2013-06-04 19:50:16 -04:00
|
|
|
//closure
|
2013-05-24 13:24:33 -04:00
|
|
|
closure(stateSets[i]);
|
2013-06-04 19:50:16 -04:00
|
|
|
//Add the new states
|
|
|
|
|
addStates(&stateSets, stateSets[i]);
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-26 22:12:47 -04:00
|
|
|
void Parser::closure(State* state) {
|
2013-05-24 00:00:41 -04:00
|
|
|
//Add all the applicable rules.
|
2013-05-30 19:49:19 -04:00
|
|
|
//std::cout << "Closure on " << state->toString() << " is" << std::endl;
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type i = 0; i < state->getTotal()->size(); i++) {
|
2013-05-24 13:24:33 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
|
2013-05-26 22:12:47 -04:00
|
|
|
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
|
|
|
|
|
if ((*state->getTotal())[i]->getAtNextIndex() != NULL && *((*state->getTotal())[i]->getAtNextIndex()) == *(loadedGrammer[j]->getLeftSide())) {
|
2013-05-30 19:49:19 -04:00
|
|
|
//std::cout << (*state->getTotal())[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
|
2013-05-24 00:00:41 -04:00
|
|
|
//Check to make sure not already in
|
|
|
|
|
bool isAlreadyInState = false;
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector<ParseRule*>::size_type k = 0; k < state->getTotal()->size(); k++) {
|
|
|
|
|
if ((*state->getTotal())[k] == loadedGrammer[j]) {
|
2013-05-24 00:00:41 -04:00
|
|
|
isAlreadyInState = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!isAlreadyInState)
|
2013-05-26 22:12:47 -04:00
|
|
|
state->remaining.push_back(loadedGrammer[j]);
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-05-30 19:49:19 -04:00
|
|
|
//std::cout << state->toString() << std::endl;
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Adds state if it doesn't already exist.
|
2013-06-04 19:50:16 -04:00
|
|
|
void Parser::addStates(std::vector< State* >* stateSets, State* state) {
|
2013-05-26 22:12:47 -04:00
|
|
|
std::vector< State* > newStates;
|
2013-05-24 00:00:41 -04:00
|
|
|
//For each rule in the state we already have
|
2013-06-04 19:50:16 -04:00
|
|
|
std::vector<ParseRule*>* currStateTotal = state->getTotal();
|
|
|
|
|
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
|
2013-05-24 00:00:41 -04:00
|
|
|
//Clone the current rule
|
2013-06-04 19:50:16 -04:00
|
|
|
ParseRule* advancedRule = (*currStateTotal)[i]->clone();
|
|
|
|
|
//Try to advance the pointer, if sucessful see if it is the correct next symbol
|
2013-05-24 00:00:41 -04:00
|
|
|
if (advancedRule->advancePointer()) {
|
2013-05-26 22:12:47 -04:00
|
|
|
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
|
|
|
|
|
|
|
|
|
|
//So search our new states to see if any of them use this advanced symbol as a base.
|
|
|
|
|
//If so, add this rule to them.
|
|
|
|
|
//If not, create it.
|
2013-05-24 00:00:41 -04:00
|
|
|
bool symbolAlreadyInState = false;
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) {
|
|
|
|
|
if (*(newStates[j]->basis[0]->getAtIndex()) == *(advancedRule->getAtIndex())) {
|
2013-05-24 00:00:41 -04:00
|
|
|
symbolAlreadyInState = true;
|
|
|
|
|
//So now check to see if this exact rule is in this state
|
2013-06-04 19:50:16 -04:00
|
|
|
if (!newStates[j]->containsRule(advancedRule))
|
2013-05-26 22:12:47 -04:00
|
|
|
newStates[j]->basis.push_back(advancedRule);
|
2013-05-24 00:00:41 -04:00
|
|
|
//We found a state with the same symbol, so stop searching
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!symbolAlreadyInState) {
|
2013-05-26 22:12:47 -04:00
|
|
|
State* newState = new State(stateSets->size()+newStates.size(),advancedRule);
|
2013-05-24 00:00:41 -04:00
|
|
|
newStates.push_back(newState);
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-04 19:50:16 -04:00
|
|
|
//Also add any completed rules as reduces in the action table
|
|
|
|
|
//See if reduce
|
|
|
|
|
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
|
|
|
|
|
if ((*currStateTotal)[i]->isAtEnd()) {
|
2013-06-21 14:16:16 -04:00
|
|
|
//std::cout << (*currStateTotal)[i]->toString() << " is at end, adding reduce to table" << std::endl;
|
|
|
|
|
//Iterates through follow set (not quite the correct follow set though. I believe it to be close to LALR(1))
|
|
|
|
|
std::vector<Symbol*>* followSet = gramFollowSet((*currStateTotal)[i]->getLeftSide());
|
|
|
|
|
for (std::vector<Symbol*>::size_type j = 0; j < followSet->size(); j++)
|
|
|
|
|
addToTable(state, (*followSet)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
|
2013-06-04 19:50:16 -04:00
|
|
|
} else {
|
2013-06-21 14:16:16 -04:00
|
|
|
//std::cout << (*currStateTotal)[i]->toString() << " is NOT at end" << std::endl;
|
2013-06-04 19:50:16 -04:00
|
|
|
}
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
2013-05-26 22:12:47 -04:00
|
|
|
//Put all our new states in the set of states only if they're not already there.
|
|
|
|
|
bool stateAlreadyInAllStates = false;
|
2013-06-04 19:50:16 -04:00
|
|
|
Symbol* currStateSymbol;
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
|
2013-06-21 14:16:16 -04:00
|
|
|
stateAlreadyInAllStates = false;
|
2013-06-04 19:50:16 -04:00
|
|
|
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
|
|
|
|
|
if (*(newStates[i]) == *((*stateSets)[j])) {
|
|
|
|
|
stateAlreadyInAllStates = true;
|
2013-06-04 19:50:16 -04:00
|
|
|
//If it does exist, we should add it as the shift/goto in the action table
|
|
|
|
|
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
|
2013-06-21 14:16:16 -04:00
|
|
|
std::cout << "State exists, is " << j << std::endl;
|
2013-06-04 19:50:16 -04:00
|
|
|
break;
|
2013-05-26 22:12:47 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!stateAlreadyInAllStates) {
|
|
|
|
|
stateSets->push_back(newStates[i]);
|
2013-06-04 19:50:16 -04:00
|
|
|
//If the state does not already exist, add it and add it as the shift/goto in the action table
|
|
|
|
|
addToTable(state, currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
|
2013-06-21 14:16:16 -04:00
|
|
|
std::cout << "State does not exist" << std::endl;
|
|
|
|
|
std::cout << "State is " << newStates[i]->toString() << std::endl;
|
2013-05-26 22:12:47 -04:00
|
|
|
}
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string Parser::stateSetToString() {
|
|
|
|
|
std::string concat = "";
|
2013-05-26 22:12:47 -04:00
|
|
|
for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) {
|
|
|
|
|
concat += stateSets[i]->toString();
|
2013-05-24 00:00:41 -04:00
|
|
|
}
|
|
|
|
|
return concat;
|
|
|
|
|
}
|
|
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
void Parser::addToTable(State* fromState, Symbol* tranSymbol, ParseAction* action) {
|
|
|
|
|
|
2013-06-13 14:25:10 -04:00
|
|
|
//If this is the first time we're adding to the table, add the EOF character
|
|
|
|
|
if (symbolIndexVec.size() == 0)
|
|
|
|
|
symbolIndexVec.push_back(new Symbol("$EOF$", false));
|
|
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
//find what state num the from state is
|
|
|
|
|
int stateNum = -1;
|
|
|
|
|
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
|
|
|
|
|
if (*(stateSets[i]) == *fromState) {
|
|
|
|
|
stateNum = i;
|
|
|
|
|
break;
|
2013-05-29 20:43:35 -04:00
|
|
|
}
|
|
|
|
|
}
|
2013-05-23 01:35:54 -04:00
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
//std::cout << "stateNum is " << stateNum << std::endl;
|
2013-05-30 02:12:34 -04:00
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
//If state not in table, add up to and it.
|
|
|
|
|
//std::cout << "table size is " << table.size() <<std::endl;
|
|
|
|
|
while (stateNum >= table.size()) {
|
|
|
|
|
//std::cout << "Pushing back table" << std::endl;
|
|
|
|
|
table.push_back(new std::vector<ParseAction*>);
|
|
|
|
|
}
|
2013-05-30 02:12:34 -04:00
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
//find out what index this symbol is on
|
|
|
|
|
int symbolIndex = -1;
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
|
|
|
|
|
if ( *(symbolIndexVec[i]) == *tranSymbol ) {
|
|
|
|
|
//Has been found
|
|
|
|
|
symbolIndex = i;
|
|
|
|
|
break;
|
2013-05-29 20:43:35 -04:00
|
|
|
}
|
2013-06-04 19:50:16 -04:00
|
|
|
}
|
|
|
|
|
//std::cout << "symbolIndex is " << symbolIndex << std::endl;
|
2013-05-30 02:12:34 -04:00
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
//If we've never done this symbol, add it
|
|
|
|
|
if (symbolIndex < 0) {
|
|
|
|
|
// std::cout << "pushing back symbolIndexVec" <<std::endl;
|
|
|
|
|
symbolIndex = symbolIndexVec.size();
|
|
|
|
|
symbolIndexVec.push_back(tranSymbol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//std::cout << "symbolIndex is " << symbolIndex << " which is " << symbolIndexVec[symbolIndex]->toString() << std::endl;
|
|
|
|
|
|
|
|
|
|
//std::cout << table[stateNum] << " ";
|
|
|
|
|
while (symbolIndex >= table[stateNum]->size()) {
|
|
|
|
|
table[stateNum]->push_back(NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//If this table slot is empty
|
|
|
|
|
//std::cout << "table[stateNum] is " << table[stateNum] << std::endl;
|
|
|
|
|
//std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl;
|
|
|
|
|
|
|
|
|
|
if ( (*(table[stateNum]))[symbolIndex] == NULL ) {
|
2013-06-21 14:16:16 -04:00
|
|
|
//std::cout << "Null, adding " << action->toString() << std::endl;
|
2013-06-04 19:50:16 -04:00
|
|
|
(*(table[stateNum]))[symbolIndex] = action;
|
|
|
|
|
}
|
|
|
|
|
//If the slot is not empty and does not contain ourself, then it is a conflict
|
|
|
|
|
else if ( *((*(table[stateNum]))[symbolIndex]) != *action) {
|
2013-06-21 14:16:16 -04:00
|
|
|
//std::cout << "not Null!" << std::endl;
|
2013-06-04 19:50:16 -04:00
|
|
|
std::cout << "Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << std::endl;
|
|
|
|
|
//Don't overwrite
|
|
|
|
|
//(*(table[stateNum]))[symbolIndex] = action;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string Parser::tableToString() {
|
|
|
|
|
std::string concat = "";
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++)
|
|
|
|
|
concat += "\t" + symbolIndexVec[i]->toString();
|
|
|
|
|
concat += "\n";
|
|
|
|
|
|
|
|
|
|
for (std::vector< std::vector< ParseRule* > >::size_type i = 0; i < table.size(); i++) {
|
|
|
|
|
concat += intToString(i) + "\t";
|
|
|
|
|
for (std::vector< ParseRule* >::size_type j = 0; j < table[i]->size(); j++) {
|
|
|
|
|
if ( (*(table[i]))[j] != NULL)
|
|
|
|
|
concat += (*(table[i]))[j]->toString() + "\t";
|
|
|
|
|
else
|
|
|
|
|
concat += "NULL\t";
|
2013-05-29 20:43:35 -04:00
|
|
|
}
|
2013-06-04 19:50:16 -04:00
|
|
|
concat += "\n";
|
|
|
|
|
}
|
|
|
|
|
return(concat);
|
|
|
|
|
}
|
2013-05-30 02:12:34 -04:00
|
|
|
|
2013-06-04 19:50:16 -04:00
|
|
|
ParseAction* Parser::getTable(int state, Symbol* token) {
|
|
|
|
|
int symbolIndex = -1;
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < symbolIndexVec.size(); i++) {
|
|
|
|
|
if ( *(symbolIndexVec[i]) == *token) {
|
|
|
|
|
symbolIndex = i;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-05-29 20:43:35 -04:00
|
|
|
}
|
2013-06-04 19:50:16 -04:00
|
|
|
|
2013-06-13 14:25:10 -04:00
|
|
|
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec
|
2013-06-04 19:50:16 -04:00
|
|
|
//(This assumes singular goal assignment, a simplification for now)
|
2013-06-13 14:25:10 -04:00
|
|
|
if (state == 1 && symbolIndex == 0)
|
2013-06-04 19:50:16 -04:00
|
|
|
return(new ParseAction(ParseAction::ACCEPT));
|
|
|
|
|
|
|
|
|
|
//If ourside the symbol range of this state (same as NULL), reject
|
|
|
|
|
if ( symbolIndex >= table[state]->size() )
|
|
|
|
|
return(new ParseAction(ParseAction::REJECT));
|
|
|
|
|
|
|
|
|
|
ParseAction* action = (*(table[state]))[symbolIndex];
|
|
|
|
|
//If null, reject. (this is a space with no other action)
|
|
|
|
|
if (action == NULL)
|
|
|
|
|
return(new ParseAction(ParseAction::REJECT));
|
|
|
|
|
|
|
|
|
|
//Otherwise, we have something, so return it
|
|
|
|
|
return (action);
|
2013-05-23 01:35:54 -04:00
|
|
|
}
|
|
|
|
|
|
2013-06-13 14:25:10 -04:00
|
|
|
NodeTree* Parser::parseInput(Lexer* lexer) {
|
|
|
|
|
Symbol* token = lexer->next();
|
2013-05-23 01:35:54 -04:00
|
|
|
ParseAction* action;
|
|
|
|
|
|
|
|
|
|
stateStack.push(0);
|
|
|
|
|
symbolStack.push(new Symbol("INVALID", false));
|
|
|
|
|
|
|
|
|
|
while (true) {
|
2013-06-04 19:50:16 -04:00
|
|
|
std::cout << "In state: " << intToString(stateStack.top()) << std::endl;
|
|
|
|
|
action = getTable(stateStack.top(), token);
|
2013-06-13 14:25:10 -04:00
|
|
|
std::cout << "Doing ParseAction: " << action->toString() << std::endl;
|
2013-05-23 01:35:54 -04:00
|
|
|
switch (action->action) {
|
|
|
|
|
case ParseAction::REDUCE:
|
|
|
|
|
{
|
2013-06-04 19:50:16 -04:00
|
|
|
std::cout << "Reduce by " << action->reduceRule->toString() << std::endl;
|
|
|
|
|
|
2013-05-23 01:35:54 -04:00
|
|
|
int rightSideLength = action->reduceRule->getRightSide().size();
|
2013-05-30 19:49:19 -04:00
|
|
|
//Keep track of symbols popped for parse tree
|
|
|
|
|
std::vector<Symbol*> poppedSymbols;
|
2013-05-23 01:35:54 -04:00
|
|
|
for (int i = 0; i < rightSideLength; i++) {
|
2013-05-30 19:49:19 -04:00
|
|
|
poppedSymbols.push_back(symbolStack.top());
|
2013-05-23 01:35:54 -04:00
|
|
|
stateStack.pop();
|
|
|
|
|
symbolStack.pop();
|
|
|
|
|
}
|
2013-05-30 19:49:19 -04:00
|
|
|
std::reverse(poppedSymbols.begin(), poppedSymbols.end()); //To put in order
|
|
|
|
|
//Assign the new tree to the new Symbol
|
|
|
|
|
Symbol* newSymbol = action->reduceRule->getLeftSide()->clone();
|
|
|
|
|
newSymbol->setSubTree(reduceTreeCombine(newSymbol, poppedSymbols));
|
|
|
|
|
symbolStack.push(newSymbol);
|
2013-06-04 19:50:16 -04:00
|
|
|
std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
|
|
|
|
|
stateStack.push(getTable(stateStack.top(), symbolStack.top())->shiftState);
|
|
|
|
|
std::cout << "Reduced, now condition is" << std::endl;
|
|
|
|
|
std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
|
2013-05-23 01:35:54 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case ParseAction::SHIFT:
|
2013-06-04 19:50:16 -04:00
|
|
|
std::cout << "Shift " << token->toString() << std::endl;
|
|
|
|
|
|
2013-05-23 01:35:54 -04:00
|
|
|
symbolStack.push(token);
|
2013-06-13 14:25:10 -04:00
|
|
|
token = lexer->next();
|
2013-05-23 01:35:54 -04:00
|
|
|
stateStack.push(action->shiftState);
|
|
|
|
|
break;
|
|
|
|
|
case ParseAction::ACCEPT:
|
|
|
|
|
std::cout << "ACCEPTED!" << std::endl;
|
2013-05-30 19:49:19 -04:00
|
|
|
return(symbolStack.top()->getSubTree());
|
2013-05-23 01:35:54 -04:00
|
|
|
break;
|
|
|
|
|
case ParseAction::REJECT:
|
|
|
|
|
std::cout << "REJECTED!" << std::endl;
|
2013-06-13 14:25:10 -04:00
|
|
|
std::cout << "REJECTED Symbol was " << token->toString() << std::endl;
|
2013-05-30 19:49:19 -04:00
|
|
|
return(NULL);
|
2013-05-23 01:35:54 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-30 19:49:19 -04:00
|
|
|
NodeTree* Parser::reduceTreeCombine(Symbol* newSymbol, std::vector<Symbol*> &symbols) {
|
|
|
|
|
NodeTree* newTree = new NodeTree(newSymbol->toString());
|
|
|
|
|
for (std::vector<Symbol*>::size_type i = 0; i < symbols.size(); i++) {
|
|
|
|
|
if (symbols[i]->isTerminal())
|
|
|
|
|
newTree->addChild(new NodeTree(symbols[i]->toString()));
|
|
|
|
|
else
|
|
|
|
|
newTree->addChild(symbols[i]->getSubTree());
|
|
|
|
|
}
|
|
|
|
|
return(newTree);
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-20 19:34:15 -04:00
|
|
|
std::string Parser::grammerToString() {
|
|
|
|
|
//Iterate through the vector, adding string representation of each grammer rule
|
|
|
|
|
std::cout << "About to toString\n";
|
|
|
|
|
std::string concat = "";
|
|
|
|
|
for (int i = 0; i < loadedGrammer.size(); i++) {
|
2013-05-20 22:59:57 -04:00
|
|
|
concat += loadedGrammer[i]->toString() + "\n";
|
2013-05-20 19:34:15 -04:00
|
|
|
}
|
|
|
|
|
return(concat);
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-20 22:59:57 -04:00
|
|
|
std::string Parser::grammerToDOT() {
|
|
|
|
|
//Iterate through the vector, adding DOT representation of each grammer rule
|
2013-05-30 19:49:19 -04:00
|
|
|
//std::cout << "About to DOT export\n";
|
2013-05-20 22:59:57 -04:00
|
|
|
std::string concat = "";
|
|
|
|
|
for (int i = 0; i < loadedGrammer.size(); i++) {
|
|
|
|
|
concat += loadedGrammer[i]->toDOT();
|
|
|
|
|
}
|
|
|
|
|
return("digraph Kraken_Grammer { \n" + concat + "}");
|
|
|
|
|
}
|
|
|
|
|
|