Changed regex to reference count internal structure instead of cloning because it too way too long. Added terminal decorators to grammer and lexer
This commit is contained in:
@@ -4,6 +4,7 @@ import set
|
||||
import symbol
|
||||
import regex
|
||||
import io
|
||||
import util
|
||||
|
||||
fun split_into_words(gram_str: string::string): vector::vector<string::string> {
|
||||
var out.construct(): vector::vector<string>
|
||||
@@ -56,6 +57,7 @@ fun load_grammer(gram_str: string::string): grammer {
|
||||
/*})*/
|
||||
/*return gram*/
|
||||
split_into_words(gram_str).for_each(fun(word: string::string) {
|
||||
io::print("word: "); io::println(word)
|
||||
if (word == "=") {
|
||||
// do nothing
|
||||
} else if (word == "|") {
|
||||
@@ -70,9 +72,17 @@ fun load_grammer(gram_str: string::string): grammer {
|
||||
leftSide = symbol::symbol(word, true)
|
||||
else
|
||||
if (word[0] == '"') {
|
||||
rightSide.add(symbol::symbol(word.slice(1,-2), true))
|
||||
/*gram.regexs.add_unique(regex::regex(word.slice(1,-2)))*/
|
||||
gram.regexs.add(regex::regex(word.slice(1,-2)))
|
||||
// ok, we support both plain terminals "hia*"
|
||||
// and decorated terminals "hia*":hi_with_as
|
||||
// so first check to find the ending " and see if it's
|
||||
// the end of the string
|
||||
var last_quote = word.length()-1
|
||||
while(word[last_quote] != '"') last_quote--
|
||||
rightSide.add(symbol::symbol(word.slice(1,last_quote), true))
|
||||
if (last_quote != word.length()-1)
|
||||
gram.regexs.add(util::make_pair(word.slice(last_quote+2, -1), regex::regex(word.slice(1,last_quote))))
|
||||
else
|
||||
gram.regexs.add(util::make_pair(word, regex::regex(word.slice(1,last_quote))))
|
||||
} else {
|
||||
rightSide.add(symbol::symbol(word, false))
|
||||
}
|
||||
@@ -84,7 +94,7 @@ fun load_grammer(gram_str: string::string): grammer {
|
||||
|
||||
obj grammer (Object) {
|
||||
var rules: vector::vector<rule>
|
||||
var regexs: vector::vector<regex::regex>
|
||||
var regexs: vector::vector<util::pair<string::string, regex::regex>>
|
||||
|
||||
fun construct(): *grammer {
|
||||
rules.construct()
|
||||
@@ -107,7 +117,7 @@ obj grammer (Object) {
|
||||
var result = string::string("grammer rules:")
|
||||
rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } )
|
||||
result += "\nregexs:"
|
||||
regexs.for_each( fun(i : regex::regex) { result += string::string("\n\t") + i.regexString; } )
|
||||
regexs.for_each( fun(i : util::pair<string::string, regex::regex>) { result += string::string("\n\t") + i.first + ": " + i.second.regexString; } )
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,13 +5,19 @@ import vector
|
||||
import util
|
||||
|
||||
fun lexer(regs: vector::vector<regex::regex>): lexer {
|
||||
return lexer(regs.map( fun(reg: regex::regex): util::pair<string::string, regex::regex> {
|
||||
return util::make_pair(reg.regexString,reg)
|
||||
}))
|
||||
}
|
||||
|
||||
fun lexer(regs: vector::vector<util::pair<string::string, regex::regex>>): lexer {
|
||||
var toRet.construct() :lexer
|
||||
regs.for_each( fun(reg: regex::regex) toRet.add_regex(reg); )
|
||||
regs.for_each( fun(reg: util::pair<string::string, regex::regex>) toRet.add_regex(reg); )
|
||||
return toRet
|
||||
}
|
||||
|
||||
obj lexer (Object) {
|
||||
var regs: vector::vector<regex::regex>
|
||||
var regs: vector::vector<util::pair<string::string, regex::regex>>
|
||||
var input: string::string
|
||||
var position: int
|
||||
fun construct(): *lexer {
|
||||
@@ -33,11 +39,17 @@ obj lexer (Object) {
|
||||
destruct()
|
||||
copy_construct(&old)
|
||||
}
|
||||
fun add_regex(newOne: regex::regex) {
|
||||
fun add_regex(name: string::string, newOne: regex::regex) {
|
||||
regs.add(util::make_pair(name,newOne))
|
||||
}
|
||||
fun add_regex(newOne: util::pair<string::string,regex::regex>) {
|
||||
regs.add(newOne)
|
||||
}
|
||||
fun add_regex(newOne: regex::regex) {
|
||||
regs.add(util::make_pair(newOne.regexString, newOne))
|
||||
}
|
||||
fun add_regex(newOne: *char) {
|
||||
regs.add(regex::regex(newOne))
|
||||
regs.add(util::make_pair(string::string(newOne), regex::regex(newOne)))
|
||||
}
|
||||
fun set_input(in: string::string) {
|
||||
input = in
|
||||
@@ -45,8 +57,8 @@ obj lexer (Object) {
|
||||
fun next(): symbol::symbol {
|
||||
if (position >= input.length())
|
||||
return symbol::symbol("$EOF$", true)
|
||||
var max = regs.map(fun(reg: regex::regex): util::pair<int, string::string> {
|
||||
return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); })
|
||||
var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
|
||||
return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
|
||||
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
|
||||
{ return first.first < second.first; })
|
||||
if (max.first < 0)
|
||||
|
||||
@@ -43,9 +43,16 @@ obj regexState (Object) {
|
||||
obj regex (Object) {
|
||||
var regexString: string::string
|
||||
var begin: *regexState
|
||||
var referenceCounter: *int
|
||||
|
||||
fun construct(): *regex {
|
||||
regexString.construct()
|
||||
return this
|
||||
}
|
||||
fun construct(regexStringIn: string::string): *regex {
|
||||
regexString.copy_construct(®exStringIn)
|
||||
referenceCounter = mem::new<int>()
|
||||
*referenceCounter = 1
|
||||
|
||||
var beginningAndEnd = compile(regexStringIn)
|
||||
// init our begin, and the end state as the next state of each end
|
||||
@@ -56,9 +63,11 @@ obj regex (Object) {
|
||||
}
|
||||
|
||||
fun copy_construct(old:*regex):void {
|
||||
construct(old->regexString)
|
||||
/*begin = old->begin*/
|
||||
/*regexString.copy_construct(&old->regexString)*/
|
||||
regexString.copy_construct(&old->regexString)
|
||||
begin = old->begin
|
||||
referenceCounter = old->referenceCounter
|
||||
*referenceCounter += 1
|
||||
/*construct(old->regexString)*/
|
||||
/*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/
|
||||
/*var newOne = mem::new<regexState>()->construct(it->character)*/
|
||||
/*register(newOne)*/
|
||||
@@ -70,7 +79,11 @@ obj regex (Object) {
|
||||
|
||||
fun destruct():void {
|
||||
regexString.destruct()
|
||||
mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } )
|
||||
*referenceCounter -= 1
|
||||
if (*referenceCounter == 0) {
|
||||
mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } )
|
||||
mem::delete(referenceCounter)
|
||||
}
|
||||
}
|
||||
|
||||
fun operator==(other: regex):bool {
|
||||
|
||||
Reference in New Issue
Block a user