2013-05-20 19:34:15 -04:00
# include "Parser.h"
Parser : : Parser ( ) {
}
Parser : : ~ Parser ( ) {
}
Symbol * Parser : : getOrAddSymbol ( std : : string symbolString , bool isTerminal ) {
Symbol * symbol ;
if ( symbols . find ( symbolString ) = = symbols . end ( ) ) {
symbol = new Symbol ( symbolString , isTerminal ) ;
symbols [ symbolString ] = symbol ;
} else {
symbol = symbols [ symbolString ] ;
}
return ( symbol ) ;
}
void Parser : : loadGrammer ( std : : string grammerInputString ) {
reader . setString ( grammerInputString ) ;
std : : string currToken = reader . word ( ) ;
while ( currToken ! = " " ) {
//Load the left of the rule
ParseRule * currentRule = new ParseRule ( ) ;
Symbol * leftSide = getOrAddSymbol ( currToken , false ) ; //Left handle is never a terminal
currentRule - > setLeftHandle ( leftSide ) ;
reader . word ( ) ; //Remove the =
//Add the right side, adding new Symbols to symbol map.
currToken = reader . word ( ) ;
while ( currToken ! = " ; " ) {
currentRule - > appendToRight ( getOrAddSymbol ( currToken , currToken . at ( 0 ) = = ' \" ' ) ) ; //If first character is a ", then is a terminal
currToken = reader . word ( ) ;
//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
if ( currToken = = " | " ) {
loadedGrammer . push_back ( currentRule ) ;
currentRule = new ParseRule ( ) ;
currentRule - > setLeftHandle ( leftSide ) ;
currToken = reader . word ( ) ;
}
}
//Add new rule to grammer
loadedGrammer . push_back ( currentRule ) ;
//Get next token
currToken = reader . word ( ) ;
}
std : : cout < < " Parsed! \n " ;
}
2013-05-24 00:00:41 -04:00
void Parser : : createStateSet ( ) {
2013-05-24 13:24:33 -04:00
std : : cout < < " Begining creation of stateSet " < < std : : endl ;
2013-05-26 22:12:47 -04:00
stateSets . push_back ( new State ( 0 , loadedGrammer [ 0 ] ) ) ;
2013-05-30 19:49:19 -04:00
//std::cout << "Begining for main set for loop" << std::endl;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type i = 0 ; i < stateSets . size ( ) ; i + + ) {
2013-06-04 19:50:16 -04:00
//closure
2013-05-24 13:24:33 -04:00
closure ( stateSets [ i ] ) ;
2013-06-04 19:50:16 -04:00
//Add the new states
addStates ( & stateSets , stateSets [ i ] ) ;
2013-05-24 00:00:41 -04:00
}
}
2013-05-26 22:12:47 -04:00
void Parser : : closure ( State * state ) {
2013-05-24 00:00:41 -04:00
//Add all the applicable rules.
2013-05-30 19:49:19 -04:00
//std::cout << "Closure on " << state->toString() << " is" << std::endl;
2013-05-26 22:12:47 -04:00
for ( std : : vector < ParseRule * > : : size_type i = 0 ; i < state - > getTotal ( ) - > size ( ) ; i + + ) {
2013-05-24 13:24:33 -04:00
for ( std : : vector < ParseRule * > : : size_type j = 0 ; j < loadedGrammer . size ( ) ; j + + ) {
2013-05-26 22:12:47 -04:00
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
if ( ( * state - > getTotal ( ) ) [ i ] - > getAtNextIndex ( ) ! = NULL & & * ( ( * state - > getTotal ( ) ) [ i ] - > getAtNextIndex ( ) ) = = * ( loadedGrammer [ j ] - > getLeftSide ( ) ) ) {
2013-05-30 19:49:19 -04:00
//std::cout << (*state->getTotal())[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
2013-05-24 00:00:41 -04:00
//Check to make sure not already in
bool isAlreadyInState = false ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < ParseRule * > : : size_type k = 0 ; k < state - > getTotal ( ) - > size ( ) ; k + + ) {
if ( ( * state - > getTotal ( ) ) [ k ] = = loadedGrammer [ j ] ) {
2013-05-24 00:00:41 -04:00
isAlreadyInState = true ;
break ;
}
}
if ( ! isAlreadyInState )
2013-05-26 22:12:47 -04:00
state - > remaining . push_back ( loadedGrammer [ j ] ) ;
2013-05-24 00:00:41 -04:00
}
}
}
2013-05-30 19:49:19 -04:00
//std::cout << state->toString() << std::endl;
2013-05-24 00:00:41 -04:00
}
//Adds state if it doesn't already exist.
2013-06-04 19:50:16 -04:00
void Parser : : addStates ( std : : vector < State * > * stateSets , State * state ) {
2013-05-26 22:12:47 -04:00
std : : vector < State * > newStates ;
2013-05-24 00:00:41 -04:00
//For each rule in the state we already have
2013-06-04 19:50:16 -04:00
std : : vector < ParseRule * > * currStateTotal = state - > getTotal ( ) ;
for ( std : : vector < ParseRule * > : : size_type i = 0 ; i < currStateTotal - > size ( ) ; i + + ) {
2013-05-24 00:00:41 -04:00
//Clone the current rule
2013-06-04 19:50:16 -04:00
ParseRule * advancedRule = ( * currStateTotal ) [ i ] - > clone ( ) ;
//Try to advance the pointer, if sucessful see if it is the correct next symbol
2013-05-24 00:00:41 -04:00
if ( advancedRule - > advancePointer ( ) ) {
2013-05-26 22:12:47 -04:00
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
//So search our new states to see if any of them use this advanced symbol as a base.
//If so, add this rule to them.
//If not, create it.
2013-05-24 00:00:41 -04:00
bool symbolAlreadyInState = false ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type j = 0 ; j < newStates . size ( ) ; j + + ) {
if ( * ( newStates [ j ] - > basis [ 0 ] - > getAtIndex ( ) ) = = * ( advancedRule - > getAtIndex ( ) ) ) {
2013-05-24 00:00:41 -04:00
symbolAlreadyInState = true ;
//So now check to see if this exact rule is in this state
2013-06-04 19:50:16 -04:00
if ( ! newStates [ j ] - > containsRule ( advancedRule ) )
2013-05-26 22:12:47 -04:00
newStates [ j ] - > basis . push_back ( advancedRule ) ;
2013-05-24 00:00:41 -04:00
//We found a state with the same symbol, so stop searching
break ;
}
}
if ( ! symbolAlreadyInState ) {
2013-05-26 22:12:47 -04:00
State * newState = new State ( stateSets - > size ( ) + newStates . size ( ) , advancedRule ) ;
2013-05-24 00:00:41 -04:00
newStates . push_back ( newState ) ;
}
}
2013-06-04 19:50:16 -04:00
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
if ( ( * currStateTotal ) [ i ] - > isAtEnd ( ) ) {
std : : cout < < ( * currStateTotal ) [ i ] - > toString ( ) < < " is at end, adding reduce to table " < < std : : endl ;
//This should iterate through the follow set, but right now is LR(0), so all symbols
for ( std : : vector < Symbol * > : : size_type j = 0 ; j < symbolIndexVec . size ( ) ; j + + )
addToTable ( state , symbolIndexVec [ j ] , new ParseAction ( ParseAction : : REDUCE , ( * currStateTotal ) [ i ] ) ) ;
} else {
std : : cout < < ( * currStateTotal ) [ i ] - > toString ( ) < < " is NOT at end " < < std : : endl ;
}
2013-05-24 00:00:41 -04:00
}
2013-05-26 22:12:47 -04:00
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false ;
2013-06-04 19:50:16 -04:00
Symbol * currStateSymbol ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type i = 0 ; i < newStates . size ( ) ; i + + ) {
2013-06-04 19:50:16 -04:00
currStateSymbol = ( * ( newStates [ i ] - > getBasis ( ) ) ) [ 0 ] - > getAtIndex ( ) ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type j = 0 ; j < stateSets - > size ( ) ; j + + ) {
if ( * ( newStates [ i ] ) = = * ( ( * stateSets ) [ j ] ) ) {
stateAlreadyInAllStates = true ;
2013-06-04 19:50:16 -04:00
//If it does exist, we should add it as the shift/goto in the action table
addToTable ( state , currStateSymbol , new ParseAction ( ParseAction : : SHIFT , j ) ) ;
break ;
2013-05-26 22:12:47 -04:00
}
}
if ( ! stateAlreadyInAllStates ) {
stateSets - > push_back ( newStates [ i ] ) ;
stateAlreadyInAllStates = false ;
2013-06-04 19:50:16 -04:00
//If the state does not already exist, add it and add it as the shift/goto in the action table
addToTable ( state , currStateSymbol , new ParseAction ( ParseAction : : SHIFT , stateSets - > size ( ) - 1 ) ) ;
2013-05-26 22:12:47 -04:00
}
2013-05-24 00:00:41 -04:00
}
}
std : : string Parser : : stateSetToString ( ) {
std : : string concat = " " ;
2013-05-26 22:12:47 -04:00
for ( std : : vector < State * > : : size_type i = 0 ; i < stateSets . size ( ) ; i + + ) {
concat + = stateSets [ i ] - > toString ( ) ;
2013-05-24 00:00:41 -04:00
}
return concat ;
}
2013-06-04 19:50:16 -04:00
void Parser : : addToTable ( State * fromState , Symbol * tranSymbol , ParseAction * action ) {
//find what state num the from state is
int stateNum = - 1 ;
for ( std : : vector < State * > : : size_type i = 0 ; i < stateSets . size ( ) ; i + + ) {
if ( * ( stateSets [ i ] ) = = * fromState ) {
stateNum = i ;
break ;
2013-05-29 20:43:35 -04:00
}
}
2013-05-23 01:35:54 -04:00
2013-06-04 19:50:16 -04:00
//std::cout << "stateNum is " << stateNum << std::endl;
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
//If state not in table, add up to and it.
//std::cout << "table size is " << table.size() <<std::endl;
while ( stateNum > = table . size ( ) ) {
//std::cout << "Pushing back table" << std::endl;
table . push_back ( new std : : vector < ParseAction * > ) ;
}
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
//find out what index this symbol is on
int symbolIndex = - 1 ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + ) {
if ( * ( symbolIndexVec [ i ] ) = = * tranSymbol ) {
//Has been found
symbolIndex = i ;
break ;
2013-05-29 20:43:35 -04:00
}
2013-06-04 19:50:16 -04:00
}
//std::cout << "symbolIndex is " << symbolIndex << std::endl;
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
//If we've never done this symbol, add it
if ( symbolIndex < 0 ) {
// std::cout << "pushing back symbolIndexVec" <<std::endl;
symbolIndex = symbolIndexVec . size ( ) ;
symbolIndexVec . push_back ( tranSymbol ) ;
}
//std::cout << "symbolIndex is " << symbolIndex << " which is " << symbolIndexVec[symbolIndex]->toString() << std::endl;
//std::cout << table[stateNum] << " ";
while ( symbolIndex > = table [ stateNum ] - > size ( ) ) {
table [ stateNum ] - > push_back ( NULL ) ;
}
//If this table slot is empty
//std::cout << "table[stateNum] is " << table[stateNum] << std::endl;
//std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl;
if ( ( * ( table [ stateNum ] ) ) [ symbolIndex ] = = NULL ) {
std : : cout < < " Null, adding " < < action - > toString ( ) < < std : : endl ;
( * ( table [ stateNum ] ) ) [ symbolIndex ] = action ;
}
//If the slot is not empty and does not contain ourself, then it is a conflict
else if ( * ( ( * ( table [ stateNum ] ) ) [ symbolIndex ] ) ! = * action ) {
std : : cout < < " not Null! " < < std : : endl ;
std : : cout < < " Conflict between old: " < < ( * ( table [ stateNum ] ) ) [ symbolIndex ] - > toString ( ) < < " and new: " < < action - > toString ( ) < < std : : endl ;
//Don't overwrite
//(*(table[stateNum]))[symbolIndex] = action;
}
}
std : : string Parser : : tableToString ( ) {
std : : string concat = " " ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + )
concat + = " \t " + symbolIndexVec [ i ] - > toString ( ) ;
concat + = " \n " ;
for ( std : : vector < std : : vector < ParseRule * > > : : size_type i = 0 ; i < table . size ( ) ; i + + ) {
concat + = intToString ( i ) + " \t " ;
for ( std : : vector < ParseRule * > : : size_type j = 0 ; j < table [ i ] - > size ( ) ; j + + ) {
if ( ( * ( table [ i ] ) ) [ j ] ! = NULL )
concat + = ( * ( table [ i ] ) ) [ j ] - > toString ( ) + " \t " ;
else
concat + = " NULL \t " ;
2013-05-29 20:43:35 -04:00
}
2013-06-04 19:50:16 -04:00
concat + = " \n " ;
}
return ( concat ) ;
}
2013-05-30 02:12:34 -04:00
2013-06-04 19:50:16 -04:00
ParseAction * Parser : : getTable ( int state , Symbol * token ) {
int symbolIndex = - 1 ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbolIndexVec . size ( ) ; i + + ) {
if ( * ( symbolIndexVec [ i ] ) = = * token ) {
symbolIndex = i ;
break ;
}
2013-05-29 20:43:35 -04:00
}
2013-06-04 19:50:16 -04:00
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is not in the symbolIndexVec
//(This assumes singular goal assignment, a simplification for now)
if ( state = = 1 & & symbolIndex = = - 1 )
return ( new ParseAction ( ParseAction : : ACCEPT ) ) ;
//Quick hack, since there is not right now an EOF token, we'll just check for an unrecognized character (-1) and just apply reductions as if it were the first symbol
if ( symbolIndex = = - 1 )
symbolIndex = 0 ;
//If ourside the symbol range of this state (same as NULL), reject
if ( symbolIndex > = table [ state ] - > size ( ) )
return ( new ParseAction ( ParseAction : : REJECT ) ) ;
ParseAction * action = ( * ( table [ state ] ) ) [ symbolIndex ] ;
//If null, reject. (this is a space with no other action)
if ( action = = NULL )
return ( new ParseAction ( ParseAction : : REJECT ) ) ;
//Otherwise, we have something, so return it
return ( action ) ;
2013-05-23 01:35:54 -04:00
}
2013-05-30 19:49:19 -04:00
NodeTree * Parser : : parseInput ( std : : string inputString ) {
2013-05-23 01:35:54 -04:00
StringReader inputReader ;
inputReader . setString ( inputString ) ;
2013-05-30 19:49:19 -04:00
Symbol * token = new Symbol ( " \" " + inputReader . word ( ) + " \" " , true ) ;
2013-05-23 01:35:54 -04:00
ParseAction * action ;
stateStack . push ( 0 ) ;
symbolStack . push ( new Symbol ( " INVALID " , false ) ) ;
while ( true ) {
2013-06-04 19:50:16 -04:00
std : : cout < < " In state: " < < intToString ( stateStack . top ( ) ) < < std : : endl ;
action = getTable ( stateStack . top ( ) , token ) ;
2013-05-23 01:35:54 -04:00
switch ( action - > action ) {
case ParseAction : : REDUCE :
{
2013-06-04 19:50:16 -04:00
std : : cout < < " Reduce by " < < action - > reduceRule - > toString ( ) < < std : : endl ;
2013-05-23 01:35:54 -04:00
int rightSideLength = action - > reduceRule - > getRightSide ( ) . size ( ) ;
2013-05-30 19:49:19 -04:00
//Keep track of symbols popped for parse tree
std : : vector < Symbol * > poppedSymbols ;
2013-05-23 01:35:54 -04:00
for ( int i = 0 ; i < rightSideLength ; i + + ) {
2013-05-30 19:49:19 -04:00
poppedSymbols . push_back ( symbolStack . top ( ) ) ;
2013-05-23 01:35:54 -04:00
stateStack . pop ( ) ;
symbolStack . pop ( ) ;
}
2013-05-30 19:49:19 -04:00
std : : reverse ( poppedSymbols . begin ( ) , poppedSymbols . end ( ) ) ; //To put in order
//Assign the new tree to the new Symbol
Symbol * newSymbol = action - > reduceRule - > getLeftSide ( ) - > clone ( ) ;
newSymbol - > setSubTree ( reduceTreeCombine ( newSymbol , poppedSymbols ) ) ;
symbolStack . push ( newSymbol ) ;
2013-06-04 19:50:16 -04:00
std : : cout < < " top of state is " < < intToString ( stateStack . top ( ) ) < < " symbolStack top is " < < symbolStack . top ( ) - > toString ( ) < < std : : endl ;
stateStack . push ( getTable ( stateStack . top ( ) , symbolStack . top ( ) ) - > shiftState ) ;
std : : cout < < " Reduced, now condition is " < < std : : endl ;
std : : cout < < " top of state is " < < intToString ( stateStack . top ( ) ) < < " symbolStack top is " < < symbolStack . top ( ) - > toString ( ) < < std : : endl ;
2013-05-23 01:35:54 -04:00
break ;
}
case ParseAction : : SHIFT :
2013-06-04 19:50:16 -04:00
std : : cout < < " Shift " < < token - > toString ( ) < < std : : endl ;
2013-05-23 01:35:54 -04:00
symbolStack . push ( token ) ;
2013-05-30 19:49:19 -04:00
token = new Symbol ( " \" " + inputReader . word ( ) + " \" " , true ) ;
2013-05-23 01:35:54 -04:00
stateStack . push ( action - > shiftState ) ;
break ;
case ParseAction : : ACCEPT :
std : : cout < < " ACCEPTED! " < < std : : endl ;
2013-05-30 19:49:19 -04:00
return ( symbolStack . top ( ) - > getSubTree ( ) ) ;
2013-05-23 01:35:54 -04:00
break ;
case ParseAction : : REJECT :
std : : cout < < " REJECTED! " < < std : : endl ;
2013-05-30 19:49:19 -04:00
return ( NULL ) ;
2013-05-23 01:35:54 -04:00
break ;
}
}
}
2013-05-30 19:49:19 -04:00
NodeTree * Parser : : reduceTreeCombine ( Symbol * newSymbol , std : : vector < Symbol * > & symbols ) {
NodeTree * newTree = new NodeTree ( newSymbol - > toString ( ) ) ;
for ( std : : vector < Symbol * > : : size_type i = 0 ; i < symbols . size ( ) ; i + + ) {
if ( symbols [ i ] - > isTerminal ( ) )
newTree - > addChild ( new NodeTree ( symbols [ i ] - > toString ( ) ) ) ;
else
newTree - > addChild ( symbols [ i ] - > getSubTree ( ) ) ;
}
return ( newTree ) ;
}
2013-05-20 19:34:15 -04:00
std : : string Parser : : grammerToString ( ) {
//Iterate through the vector, adding string representation of each grammer rule
std : : cout < < " About to toString \n " ;
std : : string concat = " " ;
for ( int i = 0 ; i < loadedGrammer . size ( ) ; i + + ) {
2013-05-20 22:59:57 -04:00
concat + = loadedGrammer [ i ] - > toString ( ) + " \n " ;
2013-05-20 19:34:15 -04:00
}
return ( concat ) ;
}
2013-05-20 22:59:57 -04:00
std : : string Parser : : grammerToDOT ( ) {
//Iterate through the vector, adding DOT representation of each grammer rule
2013-05-30 19:49:19 -04:00
//std::cout << "About to DOT export\n";
2013-05-20 22:59:57 -04:00
std : : string concat = " " ;
for ( int i = 0 ; i < loadedGrammer . size ( ) ; i + + ) {
concat + = loadedGrammer [ i ] - > toDOT ( ) ;
}
return ( " digraph Kraken_Grammer { \n " + concat + " } " ) ;
}