From 08baae465a5a0d090364f84dc176d6258c98c0a5 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Fri, 25 Oct 2013 02:04:22 -0700 Subject: [PATCH 1/5] Add unit tests for StringReader. You can run them with `./kraken --test`. --- include/StringReader.h | 2 ++ main.cpp | 8 ++++++-- src/StringReader.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/include/StringReader.h b/include/StringReader.h index 6e79a5d..f25bfec 100644 --- a/include/StringReader.h +++ b/include/StringReader.h @@ -16,6 +16,8 @@ class StringReader std::string line(bool truncateEnd = true); std::string getTokens(std::vector get_chars, bool truncateEnd = true); std::string truncateEnd(std::string to_truncate); + + static void test(); protected: private: std::string rd_string; diff --git a/main.cpp b/main.cpp index 275c452..13d60c9 100644 --- a/main.cpp +++ b/main.cpp @@ -17,7 +17,11 @@ int main(int argc, char* argv[]) { - + if (argc == 2 && std::string(argv[1]) == "--test") { + StringReader::test(); + return 0; + } + std::ifstream programInFile, grammerInFile; std::ofstream outFile, outFileTransformed, outFileAST; @@ -144,4 +148,4 @@ int main(int argc, char* argv[]) { return(0); } - \ No newline at end of file + diff --git a/src/StringReader.cpp b/src/StringReader.cpp index 2675a9a..70fbced 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -1,4 +1,5 @@ #include "StringReader.h" +#include StringReader::StringReader() { @@ -144,3 +145,48 @@ std::string StringReader::truncateEnd(std::string to_truncate) to_return = to_return + to_truncate[i]; return to_return; } + +void StringReader::test() +{ + { + StringReader reader("\"x\""); + assert(reader.word() == "\"x\""); + assert(reader.word() == ""); + } + + { + StringReader reader("\"y\" ;\n"); + assert(reader.word() == "\"y\""); + assert(reader.word() == ";"); + assert(reader.word() == ""); + } + + { + StringReader reader("Goal = greeting ;\n" + "greeting = \"hello\" | greeting \"world\" ;\n"); + assert(reader.word() == "Goal"); + assert(reader.word() == "="); + assert(reader.word() == "greeting"); + assert(reader.word() == ";"); + assert(reader.word() == "greeting"); + assert(reader.word() == "="); + assert(reader.word() == "\"hello\""); + assert(reader.word() == "|"); + assert(reader.word() == "greeting"); + assert(reader.word() == "\"world\""); + assert(reader.word() == ";"); + assert(reader.word() == ""); + } + + { + StringReader reader("one # pretend this is a comment\n" + " two\n"); + assert(reader.word() == "one"); + assert(reader.word() == "#"); + assert(reader.line() == "pretend this is a comment"); + assert(reader.word() == "two"); + assert(reader.word() == ""); + } + + std::cout << "StringReader tests pass\n"; +} From a18b4f41a73963a5bf6f0d7062bb973bef3e2be9 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Fri, 25 Oct 2013 02:17:02 -0700 Subject: [PATCH 2/5] Add tests for parsing quoted strings. --- src/StringReader.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/StringReader.cpp b/src/StringReader.cpp index 70fbced..3a887fe 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -188,5 +188,29 @@ void StringReader::test() assert(reader.word() == ""); } + { + // Quoted strings can span lines. + StringReader reader("x = \"\n \" ;\n"); + assert(reader.word() == "x"); + assert(reader.word() == "="); + assert(reader.word() == "\"\n \""); + assert(reader.word() == ";"); + assert(reader.word() == ""); + } + + { + // Strings may contain backslash-escaped quote characters. + StringReader reader( "\"abc\\\"def\\\\\\\\\\\" \"\n"); + assert(reader.word() == "\"abc\\\"def\\\\\\\\\\\" \""); + assert(reader.word() == ""); + } + + { + // A backslash-escaped backslash can be the last character in a string. + StringReader reader( "\"\\\\\" \n"); + assert(reader.word() == "\"\\\\\""); + assert(reader.word() == ""); + } + std::cout << "StringReader tests pass\n"; } From 64a405cab14087bec410b49acf8e9740db94c2b5 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Fri, 25 Oct 2013 02:26:31 -0700 Subject: [PATCH 3/5] Replace some code in StringReader::getTokens() with std::string::find_first_of(). Since this makes found_pos unsigned, a few comparisons involving found_pos must be adjusted (unsigned integers are always >= 0). --- include/StringReader.h | 2 +- src/StringReader.cpp | 40 ++++++++-------------------------------- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/include/StringReader.h b/include/StringReader.h index f25bfec..52d2358 100644 --- a/include/StringReader.h +++ b/include/StringReader.h @@ -14,7 +14,7 @@ class StringReader void setString(std::string inputString); std::string word(bool truncateEnd = true); std::string line(bool truncateEnd = true); - std::string getTokens(std::vector get_chars, bool truncateEnd = true); + std::string getTokens(const char *get_chars, bool truncateEnd = true); std::string truncateEnd(std::string to_truncate); static void test(); diff --git a/src/StringReader.cpp b/src/StringReader.cpp index 3a887fe..638e1df 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -25,47 +25,22 @@ void StringReader::setString(std::string inputString) std::string StringReader::word(bool truncateEnd) { - std::vector stop_chars; - stop_chars.push_back(" "); - stop_chars.push_back("\n"); - stop_chars.push_back("\t"); - - - std::string result = getTokens(stop_chars, truncateEnd); + std::string result = getTokens(" \n\t", truncateEnd); while (result == " " || result == "\n" || result == "\t") { - result = getTokens(stop_chars, truncateEnd); + result = getTokens(" \n\t", truncateEnd); } return(result); } std::string StringReader::line(bool truncateEnd) { - std::vector stop_chars; - stop_chars.push_back("\n"); - return getTokens(stop_chars, truncateEnd); + return getTokens("\n", truncateEnd); } -std::string StringReader::getTokens(std::vector stop_chars, bool truncateEnd) +std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd) { - int found_pos, new_found_pos; - std::string stop_char; - - found_pos = rd_string.find(stop_chars[0], str_pos); - stop_char = stop_chars[0]; - - for (unsigned int i = 1; i < stop_chars.size(); i++) - { - new_found_pos = rd_string.find(stop_chars[i], str_pos); - - //Ok, if the position we found is closer than what we have and is not the end of file, OR the position we are at is the end of file - //assign the new found position to the currrent found position - if ( ((new_found_pos <= found_pos) && (new_found_pos != std::string::npos)) || found_pos == std::string::npos ) - { - found_pos = new_found_pos; - stop_char = stop_chars[i]; - } - } + size_t found_pos = rd_string.find_first_of(stop_chars, str_pos); if (rd_string[str_pos] == '\"') { //See if we have an even or odd number of backslashes (that is, this quote is not or is escaped) @@ -82,7 +57,7 @@ std::string StringReader::getTokens(std::vector stop_chars, bool tr //Check to see if the quote is escaped numBackslashes = 0; countBack = 1; - while (found_pos-countBack >= 0 && rd_string[found_pos-countBack] == '\\') { + while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') { numBackslashes++; countBack++; } @@ -93,7 +68,7 @@ std::string StringReader::getTokens(std::vector stop_chars, bool tr //Check to see if it's escaped numBackslashes = 0; countBack = 1; - while (found_pos-countBack >= 0 && rd_string[found_pos-countBack] == '\\') { + while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') { numBackslashes++; countBack++; } @@ -103,6 +78,7 @@ std::string StringReader::getTokens(std::vector stop_chars, bool tr if (found_pos == str_pos) //We are at the endline { + std::string stop_char(1, rd_string[str_pos]); str_pos++; return stop_char; } else if (found_pos == std::string::npos) //We are at the end of the file From 727529fe0b65e28f6f00c674842405923c77f1b8 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Fri, 25 Oct 2013 02:32:39 -0700 Subject: [PATCH 4/5] Remove one unnecessary backslash-check in StringReader::getTokens(). (This patch looks like it's removing the last backslash-counting loop, but really it removes the first one and then reindents everything.) --- src/StringReader.cpp | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/StringReader.cpp b/src/StringReader.cpp index 638e1df..bd1f042 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -43,36 +43,26 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd) size_t found_pos = rd_string.find_first_of(stop_chars, str_pos); if (rd_string[str_pos] == '\"') { - //See if we have an even or odd number of backslashes (that is, this quote is not or is escaped) + //Find the next quote + found_pos = rd_string.find("\"", str_pos+1); + //Check to see if the quote is escaped int numBackslashes = 0; int countBack = 1; - while (str_pos-countBack >= 0 && rd_string[str_pos-countBack] == '\\') { + while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') { numBackslashes++; countBack++; } - //If the quote is not escaped - if (numBackslashes % 2 == 0) { - //Find the next quote - found_pos = rd_string.find("\"", str_pos+1); - //Check to see if the quote is escaped + //While the quote is escaped + while (numBackslashes % 2 == 1) { + //find the next quote + found_pos = rd_string.find("\"", found_pos+1); + //Check to see if it's escaped numBackslashes = 0; countBack = 1; while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') { numBackslashes++; countBack++; } - //While the quote is escaped - while (numBackslashes % 2 == 1) { - //find the next quote - found_pos = rd_string.find("\"", found_pos+1); - //Check to see if it's escaped - numBackslashes = 0; - countBack = 1; - while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') { - numBackslashes++; - countBack++; - } - } } } From 83af1b1c5a5f03491bd49f7673fd2ec5e4e68472 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Fri, 25 Oct 2013 02:42:12 -0700 Subject: [PATCH 5/5] Use std::string::substr() instead of adding one character at a time. This patch also removes a few unused util functions that are easily written using substr(). --- include/util.h | 4 +--- src/StringReader.cpp | 22 ++-------------------- src/util.cpp | 15 --------------- 3 files changed, 3 insertions(+), 38 deletions(-) diff --git a/include/util.h b/include/util.h index 2aa779d..2041601 100644 --- a/include/util.h +++ b/include/util.h @@ -10,8 +10,6 @@ #include std::string intToString(int theInt); -std::string truncateEnd(std::string to_truncate); -std::string removeBeginning(std::string to_remove); std::string replaceExEscape(std::string first, std::string search, std::string replace); -#endif \ No newline at end of file +#endif diff --git a/src/StringReader.cpp b/src/StringReader.cpp index bd1f042..29772b4 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -85,18 +85,8 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd) if (rd_string[str_pos] == '\"') found_pos++; - std::string string_section; - - for (; str_pos <= found_pos; str_pos++) - { - string_section += rd_string[str_pos]; - } - - // if (str_pos <= found_pos) { - // string_section = rd_string.substr(str_pos, found_pos+1); - // str_pos = found_pos+1; - // } - // std::cout << string_section << " - " << str_pos << " - " << found_pos << std::endl; + std::string string_section = rd_string.substr(str_pos, found_pos - str_pos + 1); + str_pos = found_pos + 1; if (truncateEnd) //Ok, we didn't add the last char, but str_pos now points at that char. So we move it one ahead. str_pos++; @@ -104,14 +94,6 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd) } } -std::string StringReader::truncateEnd(std::string to_truncate) -{ - std::string to_return = ""; - for (unsigned int i = 0; i < to_truncate.length()-1; i++) - to_return = to_return + to_truncate[i]; - return to_return; -} - void StringReader::test() { { diff --git a/src/util.cpp b/src/util.cpp index 5b298b5..b71e6b0 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -5,21 +5,6 @@ std::string intToString(int theInt) { converter << theInt; return converter.str(); } -std::string truncateEnd(std::string to_truncate) -{ - std::string to_return = ""; - for (unsigned int i = 0; i < to_truncate.length()-1; i++) - to_return = to_return + to_truncate[i]; - return to_return; -} - -std::string removeBeginning(std::string to_remove) -{ - std::string to_return = ""; - for (unsigned int i = 1; i < to_remove.length(); i++) - to_return = to_return + to_remove[i]; - return to_return; -} std::string replaceExEscape(std::string first, std::string search, std::string replace) { size_t pos = 0;