Some more bugfixes, got regex working as well as the cpp version. (leaks memory like craaazy)
This commit is contained in:
@@ -52,33 +52,25 @@ obj regexState(Object) {
|
||||
|
||||
obj regex(Object) {
|
||||
var regexString: string::string
|
||||
var begin: regexState
|
||||
var begin: regexState*
|
||||
|
||||
fun construct(regexStringIn: string::string): regex* {
|
||||
begin.construct()
|
||||
regexString.copy_construct(®exStringIn)
|
||||
|
||||
var beginningAndEnd = compile(regexStringIn)
|
||||
// add each beginning state as a next state of our begin, and the end state as the next state of each end
|
||||
beginningAndEnd.first.do(fun(it: regexState*, begin: regexState*): void { begin->next_states.add(it); }, &begin)
|
||||
// init our begin, and the end state as the next state of each end
|
||||
begin = beginningAndEnd.first
|
||||
beginningAndEnd.second.do(fun(it: regexState*, end: regexState*): void { it->next_states.add(end); }, mem::new<regexState>()->construct(conversions::to_char(1)))
|
||||
|
||||
io::print("begin: ")
|
||||
beginningAndEnd.first.do(fun(it: regexState*): void { io::print(it->character); })
|
||||
io::print("\nend: ")
|
||||
beginningAndEnd.second.do(fun(it: regexState*): void { io::print(it->character); })
|
||||
io::println()
|
||||
|
||||
return this
|
||||
}
|
||||
|
||||
fun copy_construct(old:regex*):void {
|
||||
begin.copy_construct(&old->begin)
|
||||
begin = old->begin
|
||||
regexString.copy_construct(&old->regexString)
|
||||
}
|
||||
|
||||
fun destruct():void {
|
||||
begin.destruct()
|
||||
//begin->destruct()
|
||||
regexString.destruct()
|
||||
}
|
||||
|
||||
@@ -87,7 +79,7 @@ obj regex(Object) {
|
||||
construct(other.regexString)
|
||||
}
|
||||
|
||||
fun compile(regex_string: string::string): util::pair<vector::vector<regexState*>, vector::vector<regexState*>> {
|
||||
fun compile(regex_string: string::string): util::pair<regexState*, vector::vector<regexState*>> {
|
||||
var first = mem::new<regexState>()->construct()
|
||||
var previous_begin = vector::vector<regexState*>()
|
||||
var previous_end = vector::vector<regexState*>()
|
||||
@@ -97,7 +89,7 @@ obj regex(Object) {
|
||||
var escapeing = false
|
||||
|
||||
for (var i = 0; i < regex_string.length(); i++;) {
|
||||
//io::print("i: "); io::println(i)
|
||||
//io::print("i: "); io::print(i); io::print(" : "); io::println(regex_string[i])
|
||||
if (regex_string[i] == '*' && !escapeing) {
|
||||
for (var j = 0; j < current_end.size; j++;)
|
||||
current_end[j]->next_states.add_all(current_begin)
|
||||
@@ -114,6 +106,31 @@ obj regex(Object) {
|
||||
} else if (regex_string[i] == '|' && !escapeing) {
|
||||
alternating = true
|
||||
} else if (regex_string[i] == '(' && !escapeing) {
|
||||
// note that we don't have a ')' case, as we skip past it with our indicies
|
||||
var perenEnd = i + 1
|
||||
for (var depth = 1; depth > 0; perenEnd++;)
|
||||
if (regex_string[perenEnd] == '(')
|
||||
depth++
|
||||
else if (regex_string[perenEnd] == ')')
|
||||
depth--
|
||||
//io::print("unperened: ")
|
||||
//io::println(regex_string.slice(i+1, perenEnd-1))
|
||||
var innerBeginEnd = compile(regex_string.slice(i+1, perenEnd-1))
|
||||
// NOTE: perenEnd is one past the close peren
|
||||
i = perenEnd-1
|
||||
|
||||
if (alternating) {
|
||||
previous_end.do(fun(it: regexState*, innerBegin: vector::vector<regexState*>):void { it->next_states.add_all(innerBegin); }, innerBeginEnd.first->next_states)
|
||||
current_begin.add_all(innerBeginEnd.first->next_states)
|
||||
current_end.add_all(innerBeginEnd.second)
|
||||
} else {
|
||||
current_end.do(fun(it: regexState*, innerBegin: vector::vector<regexState*>):void { it->next_states.add_all(innerBegin); }, innerBeginEnd.first->next_states)
|
||||
previous_begin = current_begin
|
||||
previous_end = current_end
|
||||
current_begin = innerBeginEnd.first->next_states
|
||||
current_end = innerBeginEnd.second
|
||||
}
|
||||
alternating = false
|
||||
} else if (regex_string[i] == '\\' && !escapeing) {
|
||||
escapeing = true
|
||||
} else {
|
||||
@@ -123,8 +140,8 @@ obj regex(Object) {
|
||||
current_begin.add(next)
|
||||
current_end.add(next)
|
||||
} else {
|
||||
current_end.do(fun(it: regexState*, next: regexState*):void { io::print("adding: "); io::print(next->character); io::print(" to "); io::println(it->character); it->next_states.add(next); }, next)
|
||||
//current_end.do(fun(it: regexState*, next: regexState*):void { it->next_states.add(next); }, next)
|
||||
current_end.do(fun(it: regexState*, next: regexState*):void { it->next_states.add(next); }, next)
|
||||
//current_end.do(fun(it: regexState*, next: regexState*):void { io::print("adding: "); io::print(next->character); io::print(" to "); io::println(it->character); it->next_states.add(next); }, next)
|
||||
//io::print("previous_begin size before current: "); io::println(previous_begin.size)
|
||||
//io::print("current_begin size before current: "); io::println(current_begin.size)
|
||||
previous_begin = current_begin
|
||||
@@ -134,33 +151,27 @@ obj regex(Object) {
|
||||
//io::print("current_begin size after current: "); io::println(current_begin.size)
|
||||
current_end = vector::vector(next)
|
||||
}
|
||||
escapeing = false
|
||||
alternating = false
|
||||
}
|
||||
}
|
||||
var beginAndEnd = util::make_pair(first->next_states, current_end)
|
||||
mem::delete(first)
|
||||
var beginAndEnd = util::make_pair(first, current_end)
|
||||
return beginAndEnd
|
||||
}
|
||||
|
||||
|
||||
fun long_match(to_match: char*): int { return long_match(string::string(to_match)); }
|
||||
fun long_match(to_match: string::string): int {
|
||||
var next = vector::vector(&begin)
|
||||
var longest = 0
|
||||
var next = vector::vector(begin)
|
||||
var longest = -1
|
||||
for (var i = 0; i < to_match.length(); i++;) {
|
||||
if (next.size == 0)
|
||||
return longest
|
||||
if (next.any_true(fun(state: regexState*):bool { return state->is_end(); }))
|
||||
longest = i
|
||||
//next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match(to_match[i]); })
|
||||
//next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
|
||||
//next = next.flatten_map(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
|
||||
next = next.flatten_map(fun(state: regexState*, c:char): vector::vector<regexState*> { return state->match(c); }, to_match[i])
|
||||
}
|
||||
/*io::println("ok, ending with")*/
|
||||
/*for (var i = 0; i < next.size; i++;) {*/
|
||||
/*io::println(next[i]->character)*/
|
||||
/*io::println(conversions::to_int(next[i]->next_states[0]->character))*/
|
||||
/*}*/
|
||||
if (next.any_true(fun(state: regexState*):bool { return state->is_end(); }))
|
||||
return to_match.length()
|
||||
return longest
|
||||
|
||||
@@ -39,6 +39,10 @@ obj string (Object) {
|
||||
}
|
||||
|
||||
fun operator[](index: int): char { return data[index]; }
|
||||
fun slice(first: int, second: int): string {
|
||||
var new.construct(data.slice(first,second)): string
|
||||
return new
|
||||
}
|
||||
fun length():int { return data.size; }
|
||||
|
||||
fun operator=(str: char*): void {
|
||||
|
||||
@@ -42,6 +42,7 @@ obj vector<T> (Object) {
|
||||
fun destruct(): void {
|
||||
if (data)
|
||||
delete(data, size);
|
||||
//data = 1337
|
||||
data = 0
|
||||
}
|
||||
|
||||
@@ -84,6 +85,17 @@ obj vector<T> (Object) {
|
||||
return true;
|
||||
}
|
||||
|
||||
fun slice(start: int, end: int): vector<T> {
|
||||
var new.construct(): vector<T>
|
||||
if (start < 0)
|
||||
start += size + 1
|
||||
if (end < 0)
|
||||
end += size + 1
|
||||
for (var i = start; i < end; i++;)
|
||||
new.add(data[i])
|
||||
return new
|
||||
}
|
||||
|
||||
fun at(index: int): T { return get(index); }
|
||||
fun operator[](index: int): T { return get(index); }
|
||||
fun get(index: int): T {
|
||||
|
||||
Reference in New Issue
Block a user