From 91f801d14f8567088eb77a3a98898140689426e8 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Mon, 29 Jun 2015 01:03:51 -0400 Subject: [PATCH] Improved the lexer to be functionally equlivant to the C++ version and ported the tests, commented out the dot generation from Import as it was slowing things down significantly. --- src/Importer.cpp | 8 +++--- stdlib/lexer.krak | 9 ++++++- stdlib/symbol.krak | 22 +++++++++++----- tests/test_lexer.expected_results | 20 ++++++++++++++ tests/test_lexer.krak | 43 ++++++++++++++++++++++++++++++- 5 files changed, 90 insertions(+), 12 deletions(-) diff --git a/src/Importer.cpp b/src/Importer.cpp index 1c60730..9f450fa 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -130,7 +130,7 @@ void Importer::import(std::string fileName) { return; } if (i.ast) { - outFileAST << i.ast->DOTGraphString() << std::endl; + //outFileAST << i.ast->DOTGraphString() << std::endl; } else { std::cout << "Tree returned from ASTTransformation for " << fileName << " is NULL!" << std::endl; } @@ -186,8 +186,8 @@ NodeTree* Importer::parseAndTrim(std::string fileName) { NodeTree* parseTree = parser->parseInput(programInputFileString); if (parseTree) { - //std::cout << parseTree->DOTGraphString() << std::endl; - outFile << parseTree->DOTGraphString() << std::endl; + //std::cout << parseTree->DOTGraphString() << std::endl; + //outFile << parseTree->DOTGraphString() << std::endl; } else { std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl; outFile.close(); outFileTransformed.close(); @@ -207,7 +207,7 @@ NodeTree* Importer::parseAndTrim(std::string fileName) { parseTree = CollapseTransformation(collapseSymbols[i]).transform(parseTree); if (parseTree) { - outFileTransformed << parseTree->DOTGraphString() << std::endl; + //outFileTransformed << parseTree->DOTGraphString() << std::endl; } else { std::cout << "Tree returned from transformation is NULL!" << std::endl; } diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak index bde32ff..e30a371 100644 --- a/stdlib/lexer.krak +++ b/stdlib/lexer.krak @@ -30,16 +30,23 @@ obj lexer { fun add_regex(newOne: regex::regex) { regs.add(newOne) } + fun add_regex(newOne: char*) { + regs.add(regex::regex(newOne)) + } fun set_input(in: string::string) { input = in } fun next(): symbol::symbol { + if (position >= input.length()) + return symbol::symbol("$EOF$", true) var max = regs.map(fun(reg: regex::regex): util::pair { return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); }) .max(fun(first: util::pair, second: util::pair): bool { return first.first < second.first; }) + if (max.first < 0) + return symbol::symbol("$INVALID$", true) position += max.first - return symbol::symbol(input.slice(position-max.first, position), max.second, true) + return symbol::symbol(max.second, true, input.slice(position-max.first, position)) } } diff --git a/stdlib/symbol.krak b/stdlib/symbol.krak index a7c318b..65f04de 100644 --- a/stdlib/symbol.krak +++ b/stdlib/symbol.krak @@ -1,12 +1,22 @@ import string -fun symbol(dataIn: char*, nameIn: char*, terminalIn: bool): symbol { - var toRet.construct(string::string(dataIn), string::string(nameIn), terminalIn): symbol +fun symbol(nameIn: char*, terminalIn: bool): symbol { + var toRet.construct(string::string(nameIn), terminalIn, string::string("no_value")): symbol return toRet } -fun symbol(dataIn: string::string, nameIn: string::string, terminalIn: bool): symbol { - var toRet.construct(dataIn, nameIn, terminalIn): symbol +fun symbol(nameIn: string::string, terminalIn: bool): symbol { + var toRet.construct(nameIn, terminalIn, string::string("no_value")): symbol + return toRet +} + +fun symbol(nameIn: char*, terminalIn: bool, dataIn: char*): symbol { + var toRet.construct(string::string(nameIn), terminalIn, string::string(dataIn)): symbol + return toRet +} + +fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): symbol { + var toRet.construct(nameIn, terminalIn, dataIn): symbol return toRet } @@ -20,10 +30,10 @@ obj symbol { name.construct() return this } - fun construct(dataIn: string::string, nameIn: string::string, terminalIn: bool): symbol* { - data.construct(dataIn) + fun construct(nameIn: string::string, terminalIn: bool, dataIn: string::string): symbol* { name.construct(nameIn) terminal = terminalIn + data.construct(dataIn) return this } fun destruct() { diff --git a/tests/test_lexer.expected_results b/tests/test_lexer.expected_results index 40e88f8..b2bc90f 100644 --- a/tests/test_lexer.expected_results +++ b/tests/test_lexer.expected_results @@ -1,2 +1,22 @@ a+: aaaa true test: test true +old contributed tests +b: b true +b: b true +$EOF$: no_value true + +a*: aaa true +b: b true +a*: aa true +b: b true +b: b true +$EOF$: no_value true + +a|b: b true +$INVALID$: no_value true + +xyzzy: xyzzy true +$EOF$: no_value true + +(i|n|t|e)+: intent true +$EOF$: no_value true diff --git a/tests/test_lexer.krak b/tests/test_lexer.krak index 0c3fac8..9ba919a 100644 --- a/tests/test_lexer.krak +++ b/tests/test_lexer.krak @@ -3,14 +3,55 @@ import regex:* import string:* import symbol:* import io:* +import util:* fun main(): int { var lex.construct(): lexer lex.set_input(string("aaaatesta")) lex.add_regex(regex("a+")) - lex.add_regex(regex("test")) + lex.add_regex("test") println(lex.next().to_string()) println(lex.next().to_string()) + + println("old contributed tests") + + { + var lex.construct(): lexer + lex.add_regex("b") + lex.set_input(string("bb")) + range(3).for_each(fun(i: int) { println(lex.next().to_string()); } ) + } + println() + { + var lex.construct(): lexer + lex.add_regex("a*") + lex.add_regex("b") + lex.set_input(string("aaabaabb")) + range(6).for_each(fun(i: int) { println(lex.next().to_string()); } ) + } + println() + { + var lex.construct(): lexer + lex.add_regex("a|b") + lex.set_input(string("blah")) + range(2).for_each(fun(i: int) { println(lex.next().to_string()); } ) + } + println() + { + var lex.construct(): lexer + lex.add_regex("xyzzy") + lex.set_input(string("xyzzy")) + range(2).for_each(fun(i: int) { println(lex.next().to_string()); } ) + } + println() + { + var lex.construct(): lexer + lex.add_regex("int") + lex.add_regex("(i|n|t|e)+") + lex.set_input(string("intent")) + range(2).for_each(fun(i: int) { println(lex.next().to_string()); } ) + } + return 0 }