Improved the lexer to be functionally equlivant to the C++ version and ported the tests, commented out the dot generation from Import as it was slowing things down significantly.
This commit is contained in:
@@ -130,7 +130,7 @@ void Importer::import(std::string fileName) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (i.ast) {
|
if (i.ast) {
|
||||||
outFileAST << i.ast->DOTGraphString() << std::endl;
|
//outFileAST << i.ast->DOTGraphString() << std::endl;
|
||||||
} else {
|
} else {
|
||||||
std::cout << "Tree returned from ASTTransformation for " << fileName << " is NULL!" << std::endl;
|
std::cout << "Tree returned from ASTTransformation for " << fileName << " is NULL!" << std::endl;
|
||||||
}
|
}
|
||||||
@@ -187,7 +187,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
|
|||||||
|
|
||||||
if (parseTree) {
|
if (parseTree) {
|
||||||
//std::cout << parseTree->DOTGraphString() << std::endl;
|
//std::cout << parseTree->DOTGraphString() << std::endl;
|
||||||
outFile << parseTree->DOTGraphString() << std::endl;
|
//outFile << parseTree->DOTGraphString() << std::endl;
|
||||||
} else {
|
} else {
|
||||||
std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
|
std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
|
||||||
outFile.close(); outFileTransformed.close();
|
outFile.close(); outFileTransformed.close();
|
||||||
@@ -207,7 +207,7 @@ NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
|
|||||||
parseTree = CollapseTransformation<Symbol>(collapseSymbols[i]).transform(parseTree);
|
parseTree = CollapseTransformation<Symbol>(collapseSymbols[i]).transform(parseTree);
|
||||||
|
|
||||||
if (parseTree) {
|
if (parseTree) {
|
||||||
outFileTransformed << parseTree->DOTGraphString() << std::endl;
|
//outFileTransformed << parseTree->DOTGraphString() << std::endl;
|
||||||
} else {
|
} else {
|
||||||
std::cout << "Tree returned from transformation is NULL!" << std::endl;
|
std::cout << "Tree returned from transformation is NULL!" << std::endl;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,16 +30,23 @@ obj lexer {
|
|||||||
fun add_regex(newOne: regex::regex) {
|
fun add_regex(newOne: regex::regex) {
|
||||||
regs.add(newOne)
|
regs.add(newOne)
|
||||||
}
|
}
|
||||||
|
fun add_regex(newOne: char*) {
|
||||||
|
regs.add(regex::regex(newOne))
|
||||||
|
}
|
||||||
fun set_input(in: string::string) {
|
fun set_input(in: string::string) {
|
||||||
input = in
|
input = in
|
||||||
}
|
}
|
||||||
fun next(): symbol::symbol {
|
fun next(): symbol::symbol {
|
||||||
|
if (position >= input.length())
|
||||||
|
return symbol::symbol("$EOF$", true)
|
||||||
var max = regs.map(fun(reg: regex::regex): util::pair<int, string::string> {
|
var max = regs.map(fun(reg: regex::regex): util::pair<int, string::string> {
|
||||||
return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); })
|
return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); })
|
||||||
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
|
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
|
||||||
{ return first.first < second.first; })
|
{ return first.first < second.first; })
|
||||||
|
if (max.first < 0)
|
||||||
|
return symbol::symbol("$INVALID$", true)
|
||||||
position += max.first
|
position += max.first
|
||||||
return symbol::symbol(input.slice(position-max.first, position), max.second, true)
|
return symbol::symbol(max.second, true, input.slice(position-max.first, position))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,22 @@
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
fun symbol(dataIn: char*, nameIn: char*, terminalIn: bool): symbol {
|
fun symbol(nameIn: char*, terminalIn: bool): symbol {
|
||||||
var toRet.construct(string::string(dataIn), string::string(nameIn), terminalIn): symbol
|
var toRet.construct(string::string(nameIn), terminalIn, string::string("no_value")): symbol
|
||||||
return toRet
|
return toRet
|
||||||
}
|
}
|
||||||
|
|
||||||
fun symbol(dataIn: string::string, nameIn: string::string, terminalIn: bool): symbol {
|
fun symbol(nameIn: string::string, terminalIn: bool): symbol {
|
||||||
var toRet.construct(dataIn, nameIn, terminalIn): symbol
|
var toRet.construct(nameIn, terminalIn, string::string("no_value")): symbol
|
||||||
|
return toRet
|
||||||
|
}
|
||||||
|
|
||||||
|
fun symbol(nameIn: char*, terminalIn: bool, dataIn: char*): symbol {
|
||||||
|
var toRet.construct(string::string(nameIn), terminalIn, string::string(dataIn)): symbol
|
||||||
|
return toRet
|
||||||
|
}
|
||||||
|
|
||||||
|
fun symbol(nameIn: string::string, terminalIn: bool, dataIn: string::string): symbol {
|
||||||
|
var toRet.construct(nameIn, terminalIn, dataIn): symbol
|
||||||
return toRet
|
return toRet
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20,10 +30,10 @@ obj symbol {
|
|||||||
name.construct()
|
name.construct()
|
||||||
return this
|
return this
|
||||||
}
|
}
|
||||||
fun construct(dataIn: string::string, nameIn: string::string, terminalIn: bool): symbol* {
|
fun construct(nameIn: string::string, terminalIn: bool, dataIn: string::string): symbol* {
|
||||||
data.construct(dataIn)
|
|
||||||
name.construct(nameIn)
|
name.construct(nameIn)
|
||||||
terminal = terminalIn
|
terminal = terminalIn
|
||||||
|
data.construct(dataIn)
|
||||||
return this
|
return this
|
||||||
}
|
}
|
||||||
fun destruct() {
|
fun destruct() {
|
||||||
|
|||||||
@@ -1,2 +1,22 @@
|
|||||||
a+: aaaa true
|
a+: aaaa true
|
||||||
test: test true
|
test: test true
|
||||||
|
old contributed tests
|
||||||
|
b: b true
|
||||||
|
b: b true
|
||||||
|
$EOF$: no_value true
|
||||||
|
|
||||||
|
a*: aaa true
|
||||||
|
b: b true
|
||||||
|
a*: aa true
|
||||||
|
b: b true
|
||||||
|
b: b true
|
||||||
|
$EOF$: no_value true
|
||||||
|
|
||||||
|
a|b: b true
|
||||||
|
$INVALID$: no_value true
|
||||||
|
|
||||||
|
xyzzy: xyzzy true
|
||||||
|
$EOF$: no_value true
|
||||||
|
|
||||||
|
(i|n|t|e)+: intent true
|
||||||
|
$EOF$: no_value true
|
||||||
|
|||||||
@@ -3,14 +3,55 @@ import regex:*
|
|||||||
import string:*
|
import string:*
|
||||||
import symbol:*
|
import symbol:*
|
||||||
import io:*
|
import io:*
|
||||||
|
import util:*
|
||||||
|
|
||||||
fun main(): int {
|
fun main(): int {
|
||||||
var lex.construct(): lexer
|
var lex.construct(): lexer
|
||||||
lex.set_input(string("aaaatesta"))
|
lex.set_input(string("aaaatesta"))
|
||||||
lex.add_regex(regex("a+"))
|
lex.add_regex(regex("a+"))
|
||||||
lex.add_regex(regex("test"))
|
lex.add_regex("test")
|
||||||
println(lex.next().to_string())
|
println(lex.next().to_string())
|
||||||
println(lex.next().to_string())
|
println(lex.next().to_string())
|
||||||
|
|
||||||
|
println("old contributed tests")
|
||||||
|
|
||||||
|
{
|
||||||
|
var lex.construct(): lexer
|
||||||
|
lex.add_regex("b")
|
||||||
|
lex.set_input(string("bb"))
|
||||||
|
range(3).for_each(fun(i: int) { println(lex.next().to_string()); } )
|
||||||
|
}
|
||||||
|
println()
|
||||||
|
{
|
||||||
|
var lex.construct(): lexer
|
||||||
|
lex.add_regex("a*")
|
||||||
|
lex.add_regex("b")
|
||||||
|
lex.set_input(string("aaabaabb"))
|
||||||
|
range(6).for_each(fun(i: int) { println(lex.next().to_string()); } )
|
||||||
|
}
|
||||||
|
println()
|
||||||
|
{
|
||||||
|
var lex.construct(): lexer
|
||||||
|
lex.add_regex("a|b")
|
||||||
|
lex.set_input(string("blah"))
|
||||||
|
range(2).for_each(fun(i: int) { println(lex.next().to_string()); } )
|
||||||
|
}
|
||||||
|
println()
|
||||||
|
{
|
||||||
|
var lex.construct(): lexer
|
||||||
|
lex.add_regex("xyzzy")
|
||||||
|
lex.set_input(string("xyzzy"))
|
||||||
|
range(2).for_each(fun(i: int) { println(lex.next().to_string()); } )
|
||||||
|
}
|
||||||
|
println()
|
||||||
|
{
|
||||||
|
var lex.construct(): lexer
|
||||||
|
lex.add_regex("int")
|
||||||
|
lex.add_regex("(i|n|t|e)+")
|
||||||
|
lex.set_input(string("intent"))
|
||||||
|
range(2).for_each(fun(i: int) { println(lex.next().to_string()); } )
|
||||||
|
}
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user