Some more bugfixes, got regex working as well as the cpp version. (leaks memory like craaazy)

This commit is contained in:
Nathan Braswell
2015-06-14 18:13:52 -04:00
parent f60148054f
commit 7b6e47544a
9 changed files with 184 additions and 40 deletions

View File

@@ -52,33 +52,25 @@ obj regexState(Object) {
obj regex(Object) {
var regexString: string::string
var begin: regexState
var begin: regexState*
fun construct(regexStringIn: string::string): regex* {
begin.construct()
regexString.copy_construct(&regexStringIn)
var beginningAndEnd = compile(regexStringIn)
// add each beginning state as a next state of our begin, and the end state as the next state of each end
beginningAndEnd.first.do(fun(it: regexState*, begin: regexState*): void { begin->next_states.add(it); }, &begin)
// init our begin, and the end state as the next state of each end
begin = beginningAndEnd.first
beginningAndEnd.second.do(fun(it: regexState*, end: regexState*): void { it->next_states.add(end); }, mem::new<regexState>()->construct(conversions::to_char(1)))
io::print("begin: ")
beginningAndEnd.first.do(fun(it: regexState*): void { io::print(it->character); })
io::print("\nend: ")
beginningAndEnd.second.do(fun(it: regexState*): void { io::print(it->character); })
io::println()
return this
}
fun copy_construct(old:regex*):void {
begin.copy_construct(&old->begin)
begin = old->begin
regexString.copy_construct(&old->regexString)
}
fun destruct():void {
begin.destruct()
//begin->destruct()
regexString.destruct()
}
@@ -87,7 +79,7 @@ obj regex(Object) {
construct(other.regexString)
}
fun compile(regex_string: string::string): util::pair<vector::vector<regexState*>, vector::vector<regexState*>> {
fun compile(regex_string: string::string): util::pair<regexState*, vector::vector<regexState*>> {
var first = mem::new<regexState>()->construct()
var previous_begin = vector::vector<regexState*>()
var previous_end = vector::vector<regexState*>()
@@ -97,7 +89,7 @@ obj regex(Object) {
var escapeing = false
for (var i = 0; i < regex_string.length(); i++;) {
//io::print("i: "); io::println(i)
//io::print("i: "); io::print(i); io::print(" : "); io::println(regex_string[i])
if (regex_string[i] == '*' && !escapeing) {
for (var j = 0; j < current_end.size; j++;)
current_end[j]->next_states.add_all(current_begin)
@@ -114,6 +106,31 @@ obj regex(Object) {
} else if (regex_string[i] == '|' && !escapeing) {
alternating = true
} else if (regex_string[i] == '(' && !escapeing) {
// note that we don't have a ')' case, as we skip past it with our indicies
var perenEnd = i + 1
for (var depth = 1; depth > 0; perenEnd++;)
if (regex_string[perenEnd] == '(')
depth++
else if (regex_string[perenEnd] == ')')
depth--
//io::print("unperened: ")
//io::println(regex_string.slice(i+1, perenEnd-1))
var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1))
// NOTE: perenEnd is one past the close peren
i = perenEnd-1
if (alternating) {
previous_end.do(fun(it: regexState*, innerBegin: vector::vector<regexState*>):void { it->next_states.add_all(innerBegin); }, innerBeginEnd.first->next_states)
current_begin.add_all(innerBeginEnd.first->next_states)
current_end.add_all(innerBeginEnd.second)
} else {
current_end.do(fun(it: regexState*, innerBegin: vector::vector<regexState*>):void { it->next_states.add_all(innerBegin); }, innerBeginEnd.first->next_states)
previous_begin = current_begin
previous_end = current_end
current_begin = innerBeginEnd.first->next_states
current_end = innerBeginEnd.second
}
alternating = false
} else if (regex_string[i] == '\\' && !escapeing) {
escapeing = true
} else {
@@ -123,8 +140,8 @@ obj regex(Object) {
current_begin.add(next)
current_end.add(next)
} else {
current_end.do(fun(it: regexState*, next: regexState*):void { io::print("adding: "); io::print(next->character); io::print(" to "); io::println(it->character); it->next_states.add(next); }, next)
//current_end.do(fun(it: regexState*, next: regexState*):void { it->next_states.add(next); }, next)
current_end.do(fun(it: regexState*, next: regexState*):void { it->next_states.add(next); }, next)
//current_end.do(fun(it: regexState*, next: regexState*):void { io::print("adding: "); io::print(next->character); io::print(" to "); io::println(it->character); it->next_states.add(next); }, next)
//io::print("previous_begin size before current: "); io::println(previous_begin.size)
//io::print("current_begin size before current: "); io::println(current_begin.size)
previous_begin = current_begin
@@ -134,33 +151,27 @@ obj regex(Object) {
//io::print("current_begin size after current: "); io::println(current_begin.size)
current_end = vector::vector(next)
}
escapeing = false
alternating = false
}
}
var beginAndEnd = util::make_pair(first->next_states, current_end)
mem::delete(first)
var beginAndEnd = util::make_pair(first, current_end)
return beginAndEnd
}
fun long_match(to_match: char*): int { return long_match(string::string(to_match)); }
fun long_match(to_match: string::string): int {
var next = vector::vector(&begin)
var longest = 0
var next = vector::vector(begin)
var longest = -1
for (var i = 0; i < to_match.length(); i++;) {
if (next.size == 0)
return longest
if (next.any_true(fun(state: regexState*):bool { return state->is_end(); }))
longest = i
//next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match(to_match[i]); })
//next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
//next = next.flatten_map(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
next = next.flatten_map(fun(state: regexState*, c:char): vector::vector<regexState*> { return state->match(c); }, to_match[i])
}
/*io::println("ok, ending with")*/
/*for (var i = 0; i < next.size; i++;) {*/
/*io::println(next[i]->character)*/
/*io::println(conversions::to_int(next[i]->next_states[0]->character))*/
/*}*/
if (next.any_true(fun(state: regexState*):bool { return state->is_end(); }))
return to_match.length()
return longest

View File

@@ -39,6 +39,10 @@ obj string (Object) {
}
fun operator[](index: int): char { return data[index]; }
fun slice(first: int, second: int): string {
var new.construct(data.slice(first,second)): string
return new
}
fun length():int { return data.size; }
fun operator=(str: char*): void {

View File

@@ -42,6 +42,7 @@ obj vector<T> (Object) {
fun destruct(): void {
if (data)
delete(data, size);
//data = 1337
data = 0
}
@@ -84,6 +85,17 @@ obj vector<T> (Object) {
return true;
}
fun slice(start: int, end: int): vector<T> {
var new.construct(): vector<T>
if (start < 0)
start += size + 1
if (end < 0)
end += size + 1
for (var i = start; i < end; i++;)
new.add(data[i])
return new
}
fun at(index: int): T { return get(index); }
fun operator[](index: int): T { return get(index); }
fun get(index: int): T {