Fix a minor bug in the lexer (it would not match the last character of the input) and add Lexer tests.
This commit is contained in:
@@ -16,9 +16,10 @@ class Lexer {
|
|||||||
void addRegEx(std::string regExString);
|
void addRegEx(std::string regExString);
|
||||||
void setInput(std::string inputString);
|
void setInput(std::string inputString);
|
||||||
Symbol next();
|
Symbol next();
|
||||||
|
static void test();
|
||||||
private:
|
private:
|
||||||
std::vector<RegEx*> regExs;
|
std::vector<RegEx*> regExs;
|
||||||
std::string input;
|
std::string input;
|
||||||
int currentPosition;
|
int currentPosition;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
1
main.cpp
1
main.cpp
@@ -19,6 +19,7 @@
|
|||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
if (argc == 2 && std::string(argv[1]) == "--test") {
|
if (argc == 2 && std::string(argv[1]) == "--test") {
|
||||||
StringReader::test();
|
StringReader::test();
|
||||||
|
Lexer::test();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#include "Lexer.h"
|
#include "Lexer.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
Lexer::Lexer() {
|
Lexer::Lexer() {
|
||||||
//Do nothing
|
//Do nothing
|
||||||
@@ -25,7 +26,7 @@ void Lexer::addRegEx(std::string regExString) {
|
|||||||
Symbol Lexer::next() {
|
Symbol Lexer::next() {
|
||||||
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
|
//std::cout << "Current at is \"" << input.substr(currentPosition,input.length()-1) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
|
||||||
//If we're at the end, return an eof
|
//If we're at the end, return an eof
|
||||||
if (currentPosition >= input.length()-1)
|
if (currentPosition >= input.length())
|
||||||
return Symbol("$EOF$", true);
|
return Symbol("$EOF$", true);
|
||||||
int longestMatch = -1;
|
int longestMatch = -1;
|
||||||
RegEx* longestRegEx = NULL;
|
RegEx* longestRegEx = NULL;
|
||||||
@@ -48,4 +49,48 @@ Symbol Lexer::next() {
|
|||||||
//std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;
|
//std::cout << "Remaining is ||" << input.substr(currentPosition,input.length()-1) << "||" << std::endl;
|
||||||
return Symbol();
|
return Symbol();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Lexer::test() {
|
||||||
|
Symbol s;
|
||||||
|
{
|
||||||
|
Lexer lex;
|
||||||
|
lex.addRegEx("b");
|
||||||
|
lex.setInput("bb");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "b" && s.getValue() == "b");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "b" && s.getValue() == "b");
|
||||||
|
assert(lex.next() == Symbol("$EOF$", true));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Lexer lex;
|
||||||
|
lex.addRegEx("a*");
|
||||||
|
lex.addRegEx("b");
|
||||||
|
lex.setInput("aaabaabb");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "a*" && s.getValue() == "aaa");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "b" && s.getValue() == "b");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "a*" && s.getValue() == "aa");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "b" && s.getValue() == "b");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "b" && s.getValue() == "b");
|
||||||
|
assert(lex.next() == Symbol("$EOF$", true));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test a lexer error condition.
|
||||||
|
{
|
||||||
|
Lexer lex;
|
||||||
|
lex.addRegEx("a|b");
|
||||||
|
lex.setInput("blah");
|
||||||
|
s = lex.next();
|
||||||
|
assert(s.getName() == "a|b" && s.getValue() == "b");
|
||||||
|
assert(lex.next() == Symbol());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Lexer tests passed\n";
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user