Fix substr() bug when the Lexer consumes all the input at once.
This commit is contained in:
@@ -24,13 +24,13 @@ void Lexer::addRegEx(std::string regExString) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Symbol Lexer::next() {
|
Symbol Lexer::next() {
|
||||||
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
|
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
|
||||||
//If we're at the end, return an eof
|
//If we're at the end, return an eof
|
||||||
if (currentPosition >= input.length())
|
if (currentPosition >= input.length())
|
||||||
return Symbol("$EOF$", true);
|
return Symbol("$EOF$", true);
|
||||||
int longestMatch = -1;
|
int longestMatch = -1;
|
||||||
RegEx* longestRegEx = NULL;
|
RegEx* longestRegEx = NULL;
|
||||||
std::string remainingString = input.substr(currentPosition,input.length()-1);
|
std::string remainingString = input.substr(currentPosition);
|
||||||
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
|
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
|
||||||
//std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
|
//std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
|
||||||
int currentMatch = regExs[i]->longMatch(remainingString);
|
int currentMatch = regExs[i]->longMatch(remainingString);
|
||||||
@@ -42,11 +42,11 @@ Symbol Lexer::next() {
|
|||||||
if (longestRegEx != NULL) {
|
if (longestRegEx != NULL) {
|
||||||
std::string eatenString = input.substr(currentPosition, longestMatch);
|
std::string eatenString = input.substr(currentPosition, longestMatch);
|
||||||
currentPosition += longestMatch;
|
currentPosition += longestMatch;
|
||||||
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition <<std::endl;
|
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl;
|
||||||
return Symbol(longestRegEx->getPattern(), true, eatenString);
|
return Symbol(longestRegEx->getPattern(), true, eatenString);
|
||||||
} else {
|
} else {
|
||||||
//std::cout << "Found no applicable regex" << std::endl;
|
//std::cout << "Found no applicable regex" << std::endl;
|
||||||
//std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;
|
//std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
|
||||||
return Symbol();
|
return Symbol();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -92,5 +92,15 @@ void Lexer::test() {
|
|||||||
assert(lex.next() == Symbol());
|
assert(lex.next() == Symbol());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Lexer can consume all the input at once.
|
||||||
|
{
|
||||||
|
Lexer lex;
|
||||||
|
lex.addRegEx("xyzzy");
|
||||||
|
lex.setInput("xyzzy");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "xyzzy" && s.getValue() == "xyzzy");
|
||||||
|
assert(lex.next() == Symbol("$EOF$", true));
|
||||||
|
}
|
||||||
|
|
||||||
std::cout << "Lexer tests passed\n";
|
std::cout << "Lexer tests passed\n";
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user