2013-06-13 14:25:10 -04:00
# include "Lexer.h"
2013-10-26 23:05:25 -07:00
# include <cassert>
2013-06-13 14:25:10 -04:00
Lexer : : Lexer ( ) {
//Do nothing
2013-07-02 01:47:42 -04:00
currentPosition = 0 ;
2013-06-13 14:25:10 -04:00
}
Lexer : : Lexer ( std : : string inputString ) {
2013-07-01 22:45:33 -04:00
input = inputString ;
currentPosition = 0 ;
2013-06-13 14:25:10 -04:00
}
Lexer : : ~ Lexer ( ) {
//No cleanup necessary
}
void Lexer : : setInput ( std : : string inputString ) {
2013-07-01 22:45:33 -04:00
input = inputString ;
}
2013-07-02 01:47:42 -04:00
void Lexer : : addRegEx ( std : : string regExString ) {
2013-07-01 22:45:33 -04:00
regExs . push_back ( new RegEx ( regExString ) ) ;
2013-06-13 14:25:10 -04:00
}
2013-10-02 03:15:20 -04:00
Symbol Lexer : : next ( ) {
2013-10-27 00:00:55 -07:00
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
2013-07-02 01:47:42 -04:00
//If we're at the end, return an eof
2013-10-26 23:05:25 -07:00
if ( currentPosition > = input . length ( ) )
2013-10-02 03:15:20 -04:00
return Symbol ( " $EOF$ " , true ) ;
2013-07-02 13:14:40 -04:00
int longestMatch = - 1 ;
2013-07-02 01:47:42 -04:00
RegEx * longestRegEx = NULL ;
2013-10-27 00:00:55 -07:00
std : : string remainingString = input . substr ( currentPosition ) ;
2013-07-01 22:45:33 -04:00
for ( std : : vector < RegEx * > : : size_type i = 0 ; i < regExs . size ( ) ; i + + ) {
2013-08-06 01:49:45 -04:00
//std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
2013-07-01 22:45:33 -04:00
int currentMatch = regExs [ i ] - > longMatch ( remainingString ) ;
if ( currentMatch > longestMatch ) {
longestMatch = currentMatch ;
longestRegEx = regExs [ i ] ;
}
}
2013-07-02 01:47:42 -04:00
if ( longestRegEx ! = NULL ) {
2013-10-26 23:29:23 -07:00
std : : string eatenString = input . substr ( currentPosition , longestMatch ) ;
currentPosition + = longestMatch ;
2013-10-27 00:00:55 -07:00
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl;
2013-10-02 03:15:20 -04:00
return Symbol ( longestRegEx - > getPattern ( ) , true , eatenString ) ;
2013-07-02 01:47:42 -04:00
} else {
2013-08-16 00:03:26 -04:00
//std::cout << "Found no applicable regex" << std::endl;
2013-10-27 00:00:55 -07:00
//std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
2013-10-02 03:15:20 -04:00
return Symbol ( ) ;
2013-07-02 01:47:42 -04:00
}
2013-10-26 23:05:25 -07:00
}
void Lexer : : test ( ) {
Symbol s ;
{
Lexer lex ;
lex . addRegEx ( " b " ) ;
lex . setInput ( " bb " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " b " & & s . getValue ( ) = = " b " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " b " & & s . getValue ( ) = = " b " ) ;
assert ( lex . next ( ) = = Symbol ( " $EOF$ " , true ) ) ;
}
{
Lexer lex ;
lex . addRegEx ( " a* " ) ;
lex . addRegEx ( " b " ) ;
lex . setInput ( " aaabaabb " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " a* " & & s . getValue ( ) = = " aaa " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " b " & & s . getValue ( ) = = " b " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " a* " & & s . getValue ( ) = = " aa " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " b " & & s . getValue ( ) = = " b " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " b " & & s . getValue ( ) = = " b " ) ;
assert ( lex . next ( ) = = Symbol ( " $EOF$ " , true ) ) ;
}
// Test a lexer error condition.
{
Lexer lex ;
lex . addRegEx ( " a|b " ) ;
lex . setInput ( " blah " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " a|b " & & s . getValue ( ) = = " b " ) ;
assert ( lex . next ( ) = = Symbol ( ) ) ;
}
2013-10-27 00:00:55 -07:00
// Lexer can consume all the input at once.
{
Lexer lex ;
lex . addRegEx ( " xyzzy " ) ;
lex . setInput ( " xyzzy " ) ;
s = lex . next ( ) ;
assert ( s . getName ( ) = = " xyzzy " & & s . getValue ( ) = = " xyzzy " ) ;
assert ( lex . next ( ) = = Symbol ( " $EOF$ " , true ) ) ;
}
2013-10-26 23:05:25 -07:00
std : : cout < < " Lexer tests passed \n " ;
}