From cc6ff21986a51ea32e2d84296bd5472ab5c90ed6 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 2 Jul 2013 13:14:40 -0400 Subject: [PATCH] Fixed some bugs, including modifing StringReader to treat quoted strings as whole words. --- include/RegExState.h | 1 + src/Lexer.cpp | 2 +- src/RegEx.cpp | 15 ++++++--------- src/RegExState.cpp | 7 ++++++- src/StringReader.cpp | 17 +++++++++++++++-- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/RegExState.h b/include/RegExState.h index a843071..da35c05 100644 --- a/include/RegExState.h +++ b/include/RegExState.h @@ -11,6 +11,7 @@ class RegExState { public: RegExState(RegExState* inInnerState); RegExState(char inCharacter); + RegExState(); ~RegExState(); diff --git a/src/Lexer.cpp b/src/Lexer.cpp index c08c4b4..e854f5b 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -27,7 +27,7 @@ Symbol* Lexer::next() { //If we're at the end, return an eof if (currentPosition == input.length()-1) return new Symbol("$EOF$", false); - int longestMatch = 0; + int longestMatch = -1; RegEx* longestRegEx = NULL; std::string remainingString = input.substr(currentPosition,input.length()-1); for (std::vector::size_type i = 0; i < regExs.size(); i++) { diff --git a/src/RegEx.cpp b/src/RegEx.cpp index 22f2bbc..42dda9a 100644 --- a/src/RegEx.cpp +++ b/src/RegEx.cpp @@ -3,10 +3,10 @@ RegEx::RegEx(std::string inPattern) { pattern = inPattern; RegExState* current; - begin = new RegExState(pattern[0]); + begin = new RegExState(); current = begin; - for (int i = 1; i < pattern.length(); i++) { - RegExState* next = new RegExState(pattern.at(i)); + for (int i = 0; i < pattern.length(); i++) { + RegExState* next = new RegExState(pattern[i]); current->addNext(next); current = next; } @@ -18,14 +18,11 @@ RegEx::~RegEx() { int RegEx::longMatch(std::string stringToMatch) { //If the beginning character is wrong, exit immediantly. Otherwise, get all the states we can get from adding the second character to the state where we accepted the first - if (!begin->characterIs(stringToMatch[0])) - return -1; - std::cout << "Matched first character: " << stringToMatch[0] << std::endl; - int lastMatch = 0; - currentStates = *(begin->advance(stringToMatch[1])); + int lastMatch = -1; + currentStates = *(begin->advance(stringToMatch[0])); std::vector nextStates; - for (int i = 2; i < stringToMatch.size(); i++) { + for (int i = 1; i < stringToMatch.size(); i++) { //Go through every current state. Check to see if it is goal, if so update last goal. //Also, add each state's advance to nextStates for (std::vector::size_type j = 0; j < currentStates.size(); j++) { diff --git a/src/RegExState.cpp b/src/RegExState.cpp index ade0522..2d8b86b 100644 --- a/src/RegExState.cpp +++ b/src/RegExState.cpp @@ -9,6 +9,11 @@ RegExState::RegExState(char inCharacter) { inner = NULL; } +RegExState::RegExState() { + character = 0; + inner = NULL; +} + RegExState::~RegExState() { //No cleanup necessary } @@ -36,7 +41,7 @@ bool RegExState::isGoal() { std::string RegExState::toString() { std::string string = ""; - string += character; + string += std::string("\"") + character + "\""; for (std::vector::size_type i = 0; i < nextStates.size(); i++) string += "->" + nextStates[i]->toString() + " EC "; //std::cout << "inner = " << inner << " nextStates size = " << nextStates.size() < stop_chars, bool tr } } + if (rd_string[str_pos] == '\"') { + found_pos = rd_string.find("\"", str_pos+1); + } + if (found_pos == str_pos) //We are at the endline { str_pos++; @@ -78,16 +82,25 @@ std::string StringReader::getTokens(std::vector stop_chars, bool tr return ""; } else { - std::string string_section; - if (truncateEnd) //If we want to get rid of the delimiting character, which is the default, don't add the last char. Note we have to increase str_pos by one manually later found_pos -= 1; + if (rd_string[str_pos] == '\"') + found_pos++; + + std::string string_section; + for (; str_pos <= found_pos; str_pos++) { string_section += rd_string[str_pos]; } + // if (str_pos <= found_pos) { + // string_section = rd_string.substr(str_pos, found_pos+1); + // str_pos = found_pos+1; + // } + // std::cout << string_section << " - " << str_pos << " - " << found_pos << std::endl; + if (truncateEnd) //Ok, we didn't add the last char, but str_pos now points at that char. So we move it one ahead. str_pos++; return string_section;