2013-05-20 19:34:15 -04:00
# include "Parser.h"
Parser : : Parser ( ) {
2013-07-09 02:45:59 -04:00
EOFSymbol = new Symbol ( " $EOF$ " , true ) ;
nullSymbol = new Symbol ( " $NULL$ " , true ) ;
2013-05-20 19:34:15 -04:00
}
Parser : : ~ Parser ( ) {
2013-07-09 02:45:59 -04:00
delete EOFSymbol ;
delete nullSymbol ;
2013-05-20 19:34:15 -04:00
}
Symbol * Parser : : getOrAddSymbol ( std : : string symbolString , bool isTerminal ) {
Symbol * symbol ;
if ( symbols . find ( symbolString ) = = symbols . end ( ) ) {
symbol = new Symbol ( symbolString , isTerminal ) ;
symbols [ symbolString ] = symbol ;
} else {
symbol = symbols [ symbolString ] ;
}
return ( symbol ) ;
}
void Parser : : loadGrammer ( std : : string grammerInputString ) {
reader . setString ( grammerInputString ) ;
std : : string currToken = reader . word ( ) ;
while ( currToken ! = " " ) {
//Load the left of the rule
ParseRule * currentRule = new ParseRule ( ) ;
Symbol * leftSide = getOrAddSymbol ( currToken , false ) ; //Left handle is never a terminal
currentRule - > setLeftHandle ( leftSide ) ;
reader . word ( ) ; //Remove the =
//Add the right side, adding new Symbols to symbol map.
currToken = reader . word ( ) ;
while ( currToken ! = " ; " ) {
2013-07-02 01:47:42 -04:00
if ( currToken [ 0 ] = = ' \" ' ) {
//Remove the quotes
currToken = currToken . substr ( 1 , currToken . length ( ) - 2 ) ;
lexer . addRegEx ( currToken ) ;
currentRule - > appendToRight ( getOrAddSymbol ( currToken , true ) ) ; //If first character is a ", then is a terminal
} else {
currentRule - > appendToRight ( getOrAddSymbol ( currToken , false ) ) ;
}
2013-05-20 19:34:15 -04:00
currToken = reader . word ( ) ;
//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
if ( currToken = = " | " ) {
2013-07-09 02:45:59 -04:00
//If we haven't added anything, that means that this is a null rule
if ( currentRule - > getRightSide ( ) . size ( ) = = 0 )
currentRule - > appendToRight ( nullSymbol ) ;
2013-05-20 19:34:15 -04:00
loadedGrammer . push_back ( currentRule ) ;
currentRule = new ParseRule ( ) ;
currentRule - > setLeftHandle ( leftSide ) ;
currToken = reader . word ( ) ;
}
}
//Add new rule to grammer
2013-07-09 02:45:59 -04:00
//If we haven't added anything, that means that this is a null rule
if ( currentRule - > getRightSide ( ) . size ( ) = = 0 )
currentRule - > appendToRight ( nullSymbol ) ;
2013-05-20 19:34:15 -04:00
loadedGrammer . push_back ( currentRule ) ;
//Get next token
currToken = reader . word ( ) ;
}
std : : cout < < " Parsed! \n " ;
2013-07-09 02:45:59 -04:00
for ( std : : vector < ParseRule * > : : size_type i = 0 ; i < loadedGrammer . size ( ) ; i + + )
std : : cout < < loadedGrammer [ i ] - > toString ( ) < < std : : endl ;
2013-05-20 19:34:15 -04:00
}
2013-06-13 19:11:31 -04:00
std : : vector < Symbol * > * Parser : : firstSet ( Symbol * token ) {
2013-07-10 23:50:53 -04:00
//std::cout << "Simple first set for " << token->toString() << std::endl;
std : : vector < Symbol * > avoidList ;
return firstSet ( token , avoidList ) ;
}
std : : vector < Symbol * > * Parser : : firstSet ( Symbol * token , std : : vector < Symbol * > & avoidList ) {
//If we've already done this token, don't do it again
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < avoidList . size ( ) ; i + + )
if ( * ( avoidList [ i ] ) = = * token ) {
return new std : : vector < Symbol * > ( ) ;
//std::cout << "Avoiding firstSet for " << token->toString() << std::endl;
}
avoidList . push_back ( token ) ;
//std::cout << "Cpx first set for " << token->toString() << std::endl;
//std::cout << "Doing first set for " << token->toString() << std::endl;
2013-06-13 19:11:31 -04:00
std : : vector < Symbol * > * first = new std : : vector < Symbol * > ( ) ;
//First, if the symbol is a terminal, than it's first set is just itself.
if ( token - > isTerminal ( ) ) {
first - > push_back ( token ) ;
return ( first ) ;
}
//Otherwise....
2013-07-09 02:45:59 -04:00
//Ok, to make a first set, go through the grammer, if the token it's left side, add it's production's first token's first set.
//If that one includes mull, do the next one too (if it exists).
2013-06-13 19:11:31 -04:00
Symbol * rightToken = NULL ;
std : : vector < Symbol * > * recursiveFirstSet = NULL ;
for ( std : : vector < ParseRule * > : : size_type i = 0 ; i < loadedGrammer . size ( ) ; i + + ) {
if ( * token = = * ( loadedGrammer [ i ] - > getLeftSide ( ) ) ) {
2013-07-09 02:45:59 -04:00
//Loop through the rule adding first sets for each token if the previous token contained NULL
bool recFirstSetHasNull = false ;
int j = 0 ;
do {
rightToken = loadedGrammer [ i ] - > getRightSide ( ) [ j ] ; //Get token of the right side of this rule
if ( rightToken - > isTerminal ( ) ) {
recursiveFirstSet = new std : : vector < Symbol * > ( ) ;
recursiveFirstSet - > push_back ( rightToken ) ;
} else {
//Add the entire set
2013-07-10 23:50:53 -04:00
recursiveFirstSet = firstSet ( rightToken , avoidList ) ;
2013-07-09 02:45:59 -04:00
}
2013-06-13 19:11:31 -04:00
first - > insert ( first - > end ( ) , recursiveFirstSet - > begin ( ) , recursiveFirstSet - > end ( ) ) ;
2013-07-09 02:45:59 -04:00
//Check to see if the current recursiveFirstSet contains NULL, if so, then go through again with the next token. (if there is one)
recFirstSetHasNull = false ;
for ( std : : vector < Symbol * > : : size_type k = 0 ; k < recursiveFirstSet - > size ( ) ; k + + ) {
if ( ( * ( * recursiveFirstSet ) [ j ] ) = = * nullSymbol ) {
recFirstSetHasNull = true ;
}
}
2013-07-10 23:50:53 -04:00
delete recursiveFirstSet ;
2013-07-09 02:45:59 -04:00
j + + ;
} while ( recFirstSetHasNull & & loadedGrammer [ i ] - > getRightSide ( ) . size ( ) > j ) ;
2013-06-13 19:11:31 -04:00
}
}
return ( first ) ;
}
void Parser : : printFirstSets ( ) {
std : : vector < Symbol * > * first = NULL ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + ) {
first = firstSet ( symbolIndexVec [ i ] ) ;
std : : cout < < " First set of " < < symbolIndexVec [ i ] - > toString ( ) < < " is: " ;
for ( std : : vector < Symbol * > : : size_type j = 0 ; j < first - > size ( ) ; j + + )
std : : cout < < ( * first ) [ j ] - > toString ( ) < < " " ;
std : : cout < < std : : endl ;
}
}
2013-05-24 00:00:41 -04:00
void Parser : : createStateSet ( ) {
2013-05-24 13:24:33 -04:00
std : : cout < < " Begining creation of stateSet " < < std : : endl ;
2013-06-23 05:06:38 -04:00
//First state has no parents
2013-06-26 14:27:28 -04:00
//Set the first state's basis to be the goal rule with lookahead EOF
ParseRule * goalRule = loadedGrammer [ 0 ] - > clone ( ) ;
std : : vector < Symbol * > * goalRuleLookahead = new std : : vector < Symbol * > ( ) ;
2013-07-09 02:45:59 -04:00
goalRuleLookahead - > push_back ( EOFSymbol ) ;
2013-06-26 14:27:28 -04:00
goalRule - > setLookahead ( goalRuleLookahead ) ;
stateSets . push_back ( new State ( 0 , goalRule ) ) ;
2013-05-30 19:49:19 -04:00
//std::cout << "Begining for main set for loop" << std::endl;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type i = 0 ; i < stateSets . size ( ) ; i + + ) {
2013-06-04 19:50:16 -04:00
//closure
2013-05-24 13:24:33 -04:00
closure ( stateSets [ i ] ) ;
2013-06-04 19:50:16 -04:00
//Add the new states
addStates ( & stateSets , stateSets [ i ] ) ;
2013-05-24 00:00:41 -04:00
}
}
2013-07-09 02:45:59 -04:00
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
std : : vector < Symbol * > * Parser : : incrementiveFollowSet ( ParseRule * rule ) {
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
rule = rule - > clone ( ) ;
rule - > advancePointer ( ) ;
//Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one
std : : vector < Symbol * > * followSet = new std : : vector < Symbol * > ( ) ;
std : : vector < Symbol * > * symbolFirstSet ;
bool symbolFirstSetHasNull = true ;
while ( symbolFirstSetHasNull & & ! rule - > isAtEnd ( ) ) {
symbolFirstSetHasNull = false ;
symbolFirstSet = firstSet ( rule - > getAtNextIndex ( ) ) ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolFirstSet - > size ( ) ; i + + ) {
if ( * ( ( * symbolFirstSet ) [ i ] ) = = * nullSymbol ) {
symbolFirstSetHasNull = true ;
2013-07-10 23:50:53 -04:00
symbolFirstSet - > erase ( symbolFirstSet - > begin ( ) + i ) ;
2013-07-09 02:45:59 -04:00
break ;
}
}
followSet - > insert ( followSet - > end ( ) , symbolFirstSet - > begin ( ) , symbolFirstSet - > end ( ) ) ;
delete symbolFirstSet ;
rule - > advancePointer ( ) ;
}
if ( rule - > isAtEnd ( ) ) {
symbolFirstSet = rule - > getLookahead ( ) ;
followSet - > insert ( followSet - > end ( ) , symbolFirstSet - > begin ( ) , symbolFirstSet - > end ( ) ) ;
}
2013-07-10 23:50:53 -04:00
std : : vector < Symbol * > * followSetReturn = new std : : vector < Symbol * > ( ) ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < followSet - > size ( ) ; i + + ) {
bool alreadyIn = false ;
for ( std : : vector < Symbol * > : : size_type j = 0 ; j < followSetReturn - > size ( ) ; j + + )
if ( * ( ( * followSet ) [ i ] ) = = * ( ( * followSetReturn ) [ j ] ) )
alreadyIn = true ;
if ( ! alreadyIn )
followSetReturn - > push_back ( ( * followSet ) [ i ] ) ;
}
delete followSet ;
return followSetReturn ;
2013-07-09 02:45:59 -04:00
}
2013-05-26 22:12:47 -04:00
void Parser : : closure ( State * state ) {
2013-05-24 00:00:41 -04:00
//Add all the applicable rules.
2013-05-30 19:49:19 -04:00
//std::cout << "Closure on " << state->toString() << " is" << std::endl;
2013-06-26 14:27:28 -04:00
std : : vector < ParseRule * > * stateTotal = state - > getTotal ( ) ;
for ( std : : vector < ParseRule * > : : size_type i = 0 ; i < stateTotal - > size ( ) ; i + + ) {
ParseRule * currentStateRule = ( * stateTotal ) [ i ] ;
2013-05-24 13:24:33 -04:00
for ( std : : vector < ParseRule * > : : size_type j = 0 ; j < loadedGrammer . size ( ) ; j + + ) {
2013-05-26 22:12:47 -04:00
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
2013-06-26 14:27:28 -04:00
ParseRule * currentGramRule = loadedGrammer [ j ] - > clone ( ) ;
if ( ! currentStateRule - > isAtEnd ( ) & & * ( currentStateRule - > getAtNextIndex ( ) ) = = * ( currentGramRule - > getLeftSide ( ) ) ) {
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
2013-07-10 23:50:53 -04:00
//std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl;
2013-06-26 14:27:28 -04:00
currentGramRule - > setLookahead ( incrementiveFollowSet ( currentStateRule ) ) ;
2013-05-24 00:00:41 -04:00
//Check to make sure not already in
bool isAlreadyInState = false ;
2013-06-26 14:27:28 -04:00
for ( std : : vector < ParseRule * > : : size_type k = 0 ; k < stateTotal - > size ( ) ; k + + ) {
if ( * ( ( * stateTotal ) [ k ] ) = = * currentGramRule ) {
2013-05-24 00:00:41 -04:00
isAlreadyInState = true ;
break ;
}
}
2013-06-26 14:27:28 -04:00
if ( ! isAlreadyInState ) {
state - > remaining . push_back ( currentGramRule ) ;
stateTotal = state - > getTotal ( ) ;
}
2013-05-24 00:00:41 -04:00
}
}
}
2013-05-30 19:49:19 -04:00
//std::cout << state->toString() << std::endl;
2013-05-24 00:00:41 -04:00
}
//Adds state if it doesn't already exist.
2013-06-04 19:50:16 -04:00
void Parser : : addStates ( std : : vector < State * > * stateSets , State * state ) {
2013-05-26 22:12:47 -04:00
std : : vector < State * > newStates ;
2013-05-24 00:00:41 -04:00
//For each rule in the state we already have
2013-06-04 19:50:16 -04:00
std : : vector < ParseRule * > * currStateTotal = state - > getTotal ( ) ;
for ( std : : vector < ParseRule * > : : size_type i = 0 ; i < currStateTotal - > size ( ) ; i + + ) {
2013-05-24 00:00:41 -04:00
//Clone the current rule
2013-06-04 19:50:16 -04:00
ParseRule * advancedRule = ( * currStateTotal ) [ i ] - > clone ( ) ;
//Try to advance the pointer, if sucessful see if it is the correct next symbol
2013-05-24 00:00:41 -04:00
if ( advancedRule - > advancePointer ( ) ) {
2013-05-26 22:12:47 -04:00
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
//So search our new states to see if any of them use this advanced symbol as a base.
//If so, add this rule to them.
//If not, create it.
2013-05-24 00:00:41 -04:00
bool symbolAlreadyInState = false ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type j = 0 ; j < newStates . size ( ) ; j + + ) {
if ( * ( newStates [ j ] - > basis [ 0 ] - > getAtIndex ( ) ) = = * ( advancedRule - > getAtIndex ( ) ) ) {
2013-05-24 00:00:41 -04:00
symbolAlreadyInState = true ;
//So now check to see if this exact rule is in this state
2013-06-04 19:50:16 -04:00
if ( ! newStates [ j ] - > containsRule ( advancedRule ) )
2013-05-26 22:12:47 -04:00
newStates [ j ] - > basis . push_back ( advancedRule ) ;
2013-05-24 00:00:41 -04:00
//We found a state with the same symbol, so stop searching
break ;
}
}
if ( ! symbolAlreadyInState ) {
2013-06-23 05:06:38 -04:00
State * newState = new State ( stateSets - > size ( ) + newStates . size ( ) , advancedRule , state ) ;
2013-05-24 00:00:41 -04:00
newStates . push_back ( newState ) ;
}
}
2013-06-04 19:50:16 -04:00
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
2013-07-09 02:45:59 -04:00
std : : vector < Symbol * > * lookahead = ( * currStateTotal ) [ i ] - > getLookahead ( ) ;
2013-06-04 19:50:16 -04:00
if ( ( * currStateTotal ) [ i ] - > isAtEnd ( ) ) {
2013-06-26 14:27:28 -04:00
for ( std : : vector < Symbol * > : : size_type j = 0 ; j < lookahead - > size ( ) ; j + + )
addToTable ( state , ( * lookahead ) [ j ] , new ParseAction ( ParseAction : : REDUCE , ( * currStateTotal ) [ i ] ) ) ;
2013-07-09 02:45:59 -04:00
} else if ( * ( ( * currStateTotal ) [ i ] - > getAtNextIndex ( ) ) = = * nullSymbol ) {
//If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack)
ParseRule * nullRule = ( * currStateTotal ) [ i ] - > clone ( ) ;
nullRule - > setRightSide ( * new std : : vector < Symbol * > ( ) ) ;
for ( std : : vector < Symbol * > : : size_type j = 0 ; j < lookahead - > size ( ) ; j + + )
addToTable ( state , ( * lookahead ) [ j ] , new ParseAction ( ParseAction : : REDUCE , nullRule ) ) ;
2013-06-04 19:50:16 -04:00
}
2013-05-24 00:00:41 -04:00
}
2013-05-26 22:12:47 -04:00
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false ;
2013-06-04 19:50:16 -04:00
Symbol * currStateSymbol ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type i = 0 ; i < newStates . size ( ) ; i + + ) {
2013-06-21 14:16:16 -04:00
stateAlreadyInAllStates = false ;
2013-06-04 19:50:16 -04:00
currStateSymbol = ( * ( newStates [ i ] - > getBasis ( ) ) ) [ 0 ] - > getAtIndex ( ) ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type j = 0 ; j < stateSets - > size ( ) ; j + + ) {
2013-06-21 14:46:15 -04:00
if ( newStates [ i ] - > basisEquals ( * ( ( * stateSets ) [ j ] ) ) ) {
2013-05-26 22:12:47 -04:00
stateAlreadyInAllStates = true ;
2013-06-04 19:50:16 -04:00
//If it does exist, we should add it as the shift/goto in the action table
2013-06-23 05:06:38 -04:00
( * stateSets ) [ j ] - > addParents ( newStates [ i ] - > getParents ( ) ) ;
2013-06-21 14:46:15 -04:00
addToTable ( state , currStateSymbol , new ParseAction ( ParseAction : : SHIFT , j ) ) ;
2013-06-04 19:50:16 -04:00
break ;
2013-07-09 02:45:59 -04:00
}
2013-05-26 22:12:47 -04:00
}
if ( ! stateAlreadyInAllStates ) {
2013-06-04 19:50:16 -04:00
//If the state does not already exist, add it and add it as the shift/goto in the action table
2013-07-09 02:45:59 -04:00
stateSets - > push_back ( newStates [ i ] ) ;
2013-06-21 14:46:15 -04:00
addToTable ( state , currStateSymbol , new ParseAction ( ParseAction : : SHIFT , stateSets - > size ( ) - 1 ) ) ;
2013-05-26 22:12:47 -04:00
}
2013-05-24 00:00:41 -04:00
}
}
std : : string Parser : : stateSetToString ( ) {
std : : string concat = " " ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type i = 0 ; i < stateSets . size ( ) ; i + + ) {
concat + = stateSets [ i ] - > toString ( ) ;
2013-05-24 00:00:41 -04:00
}
return concat ;
}
2013-06-04 19:50:16 -04:00
void Parser : : addToTable ( State * fromState , Symbol * tranSymbol , ParseAction * action ) {
2013-06-13 14:25:10 -04:00
//If this is the first time we're adding to the table, add the EOF character
if ( symbolIndexVec . size ( ) = = 0 )
2013-07-09 02:45:59 -04:00
symbolIndexVec . push_back ( EOFSymbol ) ;
2013-06-13 14:25:10 -04:00
2013-06-04 19:50:16 -04:00
//find what state num the from state is
int stateNum = - 1 ;
for ( std : : vector < State * > : : size_type i = 0 ; i < stateSets . size ( ) ; i + + ) {
if ( * ( stateSets [ i ] ) = = * fromState ) {
stateNum = i ;
break ;
2013-05-29 20:43:35 -04:00
}
}
2013-05-23 01:35:54 -04:00
2013-06-04 19:50:16 -04:00
//std::cout << "stateNum is " << stateNum << std::endl;
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
//If state not in table, add up to and it.
//std::cout << "table size is " << table.size() <<std::endl;
while ( stateNum > = table . size ( ) ) {
//std::cout << "Pushing back table" << std::endl;
table . push_back ( new std : : vector < ParseAction * > ) ;
}
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
//find out what index this symbol is on
int symbolIndex = - 1 ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + ) {
if ( * ( symbolIndexVec [ i ] ) = = * tranSymbol ) {
//Has been found
symbolIndex = i ;
break ;
2013-05-29 20:43:35 -04:00
}
2013-06-04 19:50:16 -04:00
}
//std::cout << "symbolIndex is " << symbolIndex << std::endl;
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
//If we've never done this symbol, add it
if ( symbolIndex < 0 ) {
// std::cout << "pushing back symbolIndexVec" <<std::endl;
symbolIndex = symbolIndexVec . size ( ) ;
symbolIndexVec . push_back ( tranSymbol ) ;
}
//std::cout << "symbolIndex is " << symbolIndex << " which is " << symbolIndexVec[symbolIndex]->toString() << std::endl;
//std::cout << table[stateNum] << " ";
while ( symbolIndex > = table [ stateNum ] - > size ( ) ) {
table [ stateNum ] - > push_back ( NULL ) ;
}
//If this table slot is empty
//std::cout << "table[stateNum] is " << table[stateNum] << std::endl;
//std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl;
if ( ( * ( table [ stateNum ] ) ) [ symbolIndex ] = = NULL ) {
2013-06-21 14:16:16 -04:00
//std::cout << "Null, adding " << action->toString() << std::endl;
2013-06-04 19:50:16 -04:00
( * ( table [ stateNum ] ) ) [ symbolIndex ] = action ;
}
//If the slot is not empty and does not contain ourself, then it is a conflict
2013-07-10 23:50:53 -04:00
else if ( ! ( * ( table [ stateNum ] ) ) [ symbolIndex ] - > equalsExceptLookahead ( * action ) ) {
2013-06-21 14:16:16 -04:00
//std::cout << "not Null!" << std::endl;
2013-06-21 14:46:15 -04:00
std : : cout < < " State: " < < stateNum < < " Conflict between old: " < < ( * ( table [ stateNum ] ) ) [ symbolIndex ] - > toString ( ) < < " and new: " < < action - > toString ( ) < < std : : endl ;
2013-06-04 19:50:16 -04:00
//Don't overwrite
//(*(table[stateNum]))[symbolIndex] = action;
}
}
std : : string Parser : : tableToString ( ) {
std : : string concat = " " ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + )
concat + = " \t " + symbolIndexVec [ i ] - > toString ( ) ;
concat + = " \n " ;
for ( std : : vector < std : : vector < ParseRule * > > : : size_type i = 0 ; i < table . size ( ) ; i + + ) {
concat + = intToString ( i ) + " \t " ;
for ( std : : vector < ParseRule * > : : size_type j = 0 ; j < table [ i ] - > size ( ) ; j + + ) {
if ( ( * ( table [ i ] ) ) [ j ] ! = NULL )
concat + = ( * ( table [ i ] ) ) [ j ] - > toString ( ) + " \t " ;
else
concat + = " NULL \t " ;
2013-05-29 20:43:35 -04:00
}
2013-06-04 19:50:16 -04:00
concat + = " \n " ;
}
return ( concat ) ;
}
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
ParseAction * Parser : : getTable ( int state , Symbol * token ) {
int symbolIndex = - 1 ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + ) {
if ( * ( symbolIndexVec [ i ] ) = = * token ) {
symbolIndex = i ;
break ;
}
2013-05-29 20:43:35 -04:00
}
2013-06-04 19:50:16 -04:00
2013-06-13 14:25:10 -04:00
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec
2013-06-04 19:50:16 -04:00
//(This assumes singular goal assignment, a simplification for now)
2013-06-13 14:25:10 -04:00
if ( state = = 1 & & symbolIndex = = 0 )
2013-06-04 19:50:16 -04:00
return ( new ParseAction ( ParseAction : : ACCEPT ) ) ;
//If ourside the symbol range of this state (same as NULL), reject
if ( symbolIndex > = table [ state ] - > size ( ) )
return ( new ParseAction ( ParseAction : : REJECT ) ) ;
ParseAction * action = ( * ( table [ state ] ) ) [ symbolIndex ] ;
//If null, reject. (this is a space with no other action)
if ( action = = NULL )
return ( new ParseAction ( ParseAction : : REJECT ) ) ;
//Otherwise, we have something, so return it
return ( action ) ;
2013-05-23 01:35:54 -04:00
}
2013-07-02 01:47:42 -04:00
NodeTree * Parser : : parseInput ( std : : string inputString ) {
lexer . setInput ( inputString ) ;
Symbol * token = lexer . next ( ) ;
2013-05-23 01:35:54 -04:00
ParseAction * action ;
stateStack . push ( 0 ) ;
symbolStack . push ( new Symbol ( " INVALID " , false ) ) ;
while ( true ) {
2013-06-04 19:50:16 -04:00
std : : cout < < " In state: " < < intToString ( stateStack . top ( ) ) < < std : : endl ;
action = getTable ( stateStack . top ( ) , token ) ;
2013-06-26 14:27:28 -04:00
//std::cout << "Doing ParseAction: " << action->toString() << std::endl;
2013-05-23 01:35:54 -04:00
switch ( action - > action ) {
case ParseAction : : REDUCE :
{
2013-06-04 19:50:16 -04:00
std : : cout < < " Reduce by " < < action - > reduceRule - > toString ( ) < < std : : endl ;
2013-05-23 01:35:54 -04:00
int rightSideLength = action - > reduceRule - > getRightSide ( ) . size ( ) ;
2013-05-30 19:49:19 -04:00
//Keep track of symbols popped for parse tree
std : : vector < Symbol * > poppedSymbols ;
2013-05-23 01:35:54 -04:00
for ( int i = 0 ; i < rightSideLength ; i + + ) {
2013-05-30 19:49:19 -04:00
poppedSymbols . push_back ( symbolStack . top ( ) ) ;
2013-05-23 01:35:54 -04:00
stateStack . pop ( ) ;
symbolStack . pop ( ) ;
}
2013-05-30 19:49:19 -04:00
std : : reverse ( poppedSymbols . begin ( ) , poppedSymbols . end ( ) ) ; //To put in order
//Assign the new tree to the new Symbol
Symbol * newSymbol = action - > reduceRule - > getLeftSide ( ) - > clone ( ) ;
newSymbol - > setSubTree ( reduceTreeCombine ( newSymbol , poppedSymbols ) ) ;
symbolStack . push ( newSymbol ) ;
2013-06-26 14:27:28 -04:00
//std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
2013-06-04 19:50:16 -04:00
stateStack . push ( getTable ( stateStack . top ( ) , symbolStack . top ( ) ) - > shiftState ) ;
2013-06-26 14:27:28 -04:00
//std::cout << "Reduced, now condition is" << std::endl;
//std::cout << "top of state is " << intToString(stateStack.top()) << " symbolStack top is " << symbolStack.top()->toString() << std::endl;
2013-05-23 01:35:54 -04:00
break ;
}
case ParseAction : : SHIFT :
2013-06-04 19:50:16 -04:00
std : : cout < < " Shift " < < token - > toString ( ) < < std : : endl ;
2013-05-23 01:35:54 -04:00
symbolStack . push ( token ) ;
2013-07-02 01:47:42 -04:00
token = lexer . next ( ) ;
2013-05-23 01:35:54 -04:00
stateStack . push ( action - > shiftState ) ;
break ;
case ParseAction : : ACCEPT :
std : : cout < < " ACCEPTED! " < < std : : endl ;
2013-05-30 19:49:19 -04:00
return ( symbolStack . top ( ) - > getSubTree ( ) ) ;
2013-05-23 01:35:54 -04:00
break ;
case ParseAction : : REJECT :
std : : cout < < " REJECTED! " < < std : : endl ;
2013-06-13 14:25:10 -04:00
std : : cout < < " REJECTED Symbol was " < < token - > toString ( ) < < std : : endl ;
2013-05-30 19:49:19 -04:00
return ( NULL ) ;
2013-05-23 01:35:54 -04:00
break ;
}
}
}
2013-05-30 19:49:19 -04:00
NodeTree * Parser : : reduceTreeCombine ( Symbol * newSymbol , std : : vector < Symbol * > & symbols ) {
2013-06-27 23:45:38 -04:00
NodeTree * newTree = new NodeTree ( newSymbol - > getName ( ) , newSymbol ) ;
2013-05-30 19:49:19 -04:00
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbols . size ( ) ; i + + ) {
if ( symbols [ i ] - > isTerminal ( ) )
2013-06-27 23:45:38 -04:00
newTree - > addChild ( new NodeTree ( symbols [ i ] - > getName ( ) , symbols [ i ] ) ) ;
2013-05-30 19:49:19 -04:00
else
newTree - > addChild ( symbols [ i ] - > getSubTree ( ) ) ;
}
return ( newTree ) ;
}
2013-05-20 19:34:15 -04:00
std : : string Parser : : grammerToString ( ) {
//Iterate through the vector, adding string representation of each grammer rule
std : : cout < < " About to toString \n " ;
std : : string concat = " " ;
for ( int i = 0 ; i < loadedGrammer . size ( ) ; i + + ) {
2013-05-20 22:59:57 -04:00
concat + = loadedGrammer [ i ] - > toString ( ) + " \n " ;
2013-05-20 19:34:15 -04:00
}
return ( concat ) ;
}
2013-05-20 22:59:57 -04:00
std : : string Parser : : grammerToDOT ( ) {
//Iterate through the vector, adding DOT representation of each grammer rule
2013-05-30 19:49:19 -04:00
//std::cout << "About to DOT export\n";
2013-05-20 22:59:57 -04:00
std : : string concat = " " ;
for ( int i = 0 ; i < loadedGrammer . size ( ) ; i + + ) {
concat + = loadedGrammer [ i ] - > toDOT ( ) ;
}
return ( " digraph Kraken_Grammer { \n " + concat + " } " ) ;
}